GCC Code Coverage Report


Directory: ./
File: strings/ctype-uca.cc
Date: 2022-12-13 11:44:05
Exec Total Coverage
Lines: 1624 1782 91.1%
Branches: 2618 9044 28.9%

Line Branch Exec Source
1 /* Copyright (c) 2004, 2022, Oracle and/or its affiliates.
2
3 This library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Library General Public
5 License, version 2.0, as published by the Free Software Foundation.
6
7 This library is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the library and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 Without limiting anything contained in the foregoing, this file,
15 which is part of C Driver for MySQL (Connector/C), is also subject to the
16 Universal FOSS Exception, version 1.0, a copy of which can be found at
17 http://oss.oracle.com/licenses/universal-foss-exception.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Library General Public License, version 2.0, for more details.
23
24 You should have received a copy of the GNU Library General Public
25 License along with this library; if not, write to the Free
26 Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
27 MA 02110-1301 USA */
28
29 /*
30 UCA (Unicode Collation Algorithm) support.
31
32 Features that are not implemented yet:
33 - No Normalization From D is done
34 + No decomposition is done
35 + No Thai/Lao orderding is done
36 - No combining marks processing is done
37 */
38
39 #include <assert.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <sys/types.h>
43 #include <algorithm>
44 #include <bitset>
45 #include <iterator>
46 #include <map>
47 #include <utility>
48
49 #include "m_ctype.h"
50 #include "m_string.h"
51 #include "my_byteorder.h"
52 #include "my_compiler.h"
53
54 #include "my_inttypes.h"
55 #include "my_loglevel.h"
56 #include "my_macros.h"
57 #include "mysys_err.h"
58 #include "strings/mb_wc.h"
59 #include "strings/str_uca_type.h"
60 #include "strings/uca900_data.h"
61 #include "strings/uca900_ja_data.h"
62 #include "strings/uca900_zh_data.h"
63 #include "strings/uca_data.h"
64 #include "template_utils.h"
65
66 MY_UCA_INFO my_uca_v400 = {
67 UCA_V400,
68
69 0xFFFF, /* maxchar */
70 uca_length, uca_weight, false, nullptr, /* contractions */
71 nullptr,
72
73 /* Logical positions */
74 0x0009, /* first_non_ignorable p != ignore */
75 0xA48C, /* last_non_ignorable Not a CJK and not UNASSIGNED */
76
77 0x0332, /* first_primary_ignorable p == 0 */
78 0x20EA, /* last_primary_ignorable */
79
80 0x0000, /* first_secondary_ignorable p,s == 0 */
81 0xFE73, /* last_secondary_ignorable p,s == 0 */
82
83 0x0000, /* first_tertiary_ignorable p,s,t == 0 */
84 0xFE73, /* last_tertiary_ignorable p,s,t == 0 */
85
86 0x0000, /* first_trailing */
87 0x0000, /* last_trailing */
88
89 0x0009, /* first_variable */
90 0x2183, /* last_variable */
91 0, /* extra_ce_pri_base, not used */
92 0, /* extra_ce_sec_base, not used */
93 0 /* extra_ce_ter_base, not used */
94 };
95
96 /******************************************************/
97
98 MY_UCA_INFO my_uca_v520 = {
99 UCA_V520,
100
101 0x10FFFF, /* maxchar */
102 uca520_length,
103 uca520_weight,
104 false,
105 nullptr, /* contractions */
106 nullptr,
107
108 0x0009, /* first_non_ignorable p != ignore */
109 0x1342E, /* last_non_ignorable Not a CJK and not UASSIGNED */
110
111 0x0332, /* first_primary_ignorable p == ignore */
112 0x101FD, /* last_primary_ignorable */
113
114 0x0000, /* first_secondary_ignorable p,s= ignore */
115 0xFE73, /* last_secondary_ignorable */
116
117 0x0000, /* first_tertiary_ignorable p,s,t == ignore */
118 0xFE73, /* last_tertiary_ignorable */
119
120 0x0000, /* first_trailing */
121 0x0000, /* last_trailing */
122
123 0x0009, /* first_variable if alt=non-ignorable: p != ignore */
124 0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */
125 0, /* extra_ce_pri_base, not used */
126 0, /* extra_ce_sec_base, not used */
127 0 /* extra_ce_ter_base, not used */
128 };
129
130 /******************************************************/
131
132 /*
133 German Phonebook
134 */
135 static const char german2[] =
136 "&AE << \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
137 "&OE << \\u0153 <<< \\u0152 << \\u00F6 <<< \\u00D6 "
138 "&UE << \\u00FC <<< \\u00DC ";
139
140 /*
141 Some sources treat LETTER A WITH DIAERESIS (00E4,00C4)
142 secondary greater than LETTER AE (00E6,00C6).
143 http://www.evertype.com/alphabets/icelandic.pdf
144 http://developer.mimer.com/collations/charts/icelandic.htm
145
146 Other sources do not provide any special rules
147 for LETTER A WITH DIAERESIS:
148 http://www.omniglot.com/writing/icelandic.htm
149 http://en.wikipedia.org/wiki/Icelandic_alphabet
150 http://oss.software.ibm.com/icu/charts/collation/is.html
151
152 Let's go the first way.
153 */
154
155 static const char icelandic[] =
156 "& A < \\u00E1 <<< \\u00C1 "
157 "& D < \\u00F0 <<< \\u00D0 "
158 "& E < \\u00E9 <<< \\u00C9 "
159 "& I < \\u00ED <<< \\u00CD "
160 "& O < \\u00F3 <<< \\u00D3 "
161 "& U < \\u00FA <<< \\u00DA "
162 "& Y < \\u00FD <<< \\u00DD "
163 "& Z < \\u00FE <<< \\u00DE "
164 "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
165 "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
166 "< \\u00E5 <<< \\u00C5 ";
167
168 /*
169 Some sources treat I and Y primary different.
170 Other sources treat I and Y the same on primary level.
171 We'll go the first way.
172 */
173
174 static const char latvian[] =
175 "& C < \\u010D <<< \\u010C "
176 "& G < \\u0123 <<< \\u0122 "
177 "& I < \\u0079 <<< \\u0059 "
178 "& K < \\u0137 <<< \\u0136 "
179 "& L < \\u013C <<< \\u013B "
180 "& N < \\u0146 <<< \\u0145 "
181 "& R < \\u0157 <<< \\u0156 "
182 "& S < \\u0161 <<< \\u0160 "
183 "& Z < \\u017E <<< \\u017D ";
184
185 static const char romanian[] =
186 "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
187 "& I < \\u00EE <<< \\u00CE "
188 "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E "
189 "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 ";
190
191 static const char slovenian[] =
192 "& C < \\u010D <<< \\u010C "
193 "& S < \\u0161 <<< \\u0160 "
194 "& Z < \\u017E <<< \\u017D ";
195
196 static const char polish[] =
197 "& A < \\u0105 <<< \\u0104 "
198 "& C < \\u0107 <<< \\u0106 "
199 "& E < \\u0119 <<< \\u0118 "
200 "& L < \\u0142 <<< \\u0141 "
201 "& N < \\u0144 <<< \\u0143 "
202 "& O < \\u00F3 <<< \\u00D3 "
203 "& S < \\u015B <<< \\u015A "
204 "& Z < \\u017A <<< \\u0179 < \\u017C <<< \\u017B";
205
206 static const char estonian[] =
207 "& S < \\u0161 <<< \\u0160 "
208 " < \\u007A <<< \\u005A "
209 " < \\u017E <<< \\u017D "
210 "& W < \\u00F5 <<< \\u00D5 "
211 "< \\u00E4 <<< \\u00C4 "
212 "< \\u00F6 <<< \\u00D6 "
213 "< \\u00FC <<< \\u00DC ";
214
215 // Standard Spanish, also for Galician.
216 static const char spanish[] = "& N < \\u00F1 <<< \\u00D1 ";
217
218 /*
219 Some sources treat V and W as similar on primary level.
220 We'll treat V and W as different on primary level.
221 */
222
223 static const char swedish[] =
224 "& Y <<\\u00FC <<< \\u00DC "
225 "& Z < \\u00E5 <<< \\u00C5 "
226 "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 "
227 "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 ";
228
229 static const char turkish[] =
230 "& C < \\u00E7 <<< \\u00C7 "
231 "& G < \\u011F <<< \\u011E "
232 "& H < \\u0131 <<< \\u0049 "
233 "& O < \\u00F6 <<< \\u00D6 "
234 "& S < \\u015F <<< \\u015E "
235 "& U < \\u00FC <<< \\u00DC ";
236
237 static const char czech[] =
238 "& C < \\u010D <<< \\u010C "
239 "& H < ch <<< Ch <<< CH"
240 "& R < \\u0159 <<< \\u0158"
241 "& S < \\u0161 <<< \\u0160"
242 "& Z < \\u017E <<< \\u017D";
243
244 static const char danish[] = /* Also good for Norwegian */
245 "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170"
246 "& Z < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4"
247 " < \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150"
248 " < \\u00E5 <<< \\u00C5 << aa <<< Aa <<< AA";
249
250 static const char lithuanian[] =
251 "& C << ch <<< Ch <<< CH< \\u010D <<< \\u010C"
252 "& E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116"
253 "& I << y <<< Y"
254 "& S < \\u0161 <<< \\u0160"
255 "& Z < \\u017E <<< \\u017D";
256
257 static const char slovak[] =
258 "& A < \\u00E4 <<< \\u00C4"
259 "& C < \\u010D <<< \\u010C"
260 "& H < ch <<< Ch <<< CH"
261 "& O < \\u00F4 <<< \\u00D4"
262 "& S < \\u0161 <<< \\u0160"
263 "& Z < \\u017E <<< \\u017D";
264
265 static const char spanish2[] = /* Also good for Asturian and Galician */
266 "&C < ch <<< Ch <<< CH"
267 "&L < ll <<< Ll <<< LL"
268 "&N < \\u00F1 <<< \\u00D1";
269
270 static const char roman[] = /* i.e. Classical Latin */
271 "& I << j <<< J "
272 "& V << u <<< U ";
273
274 /*
275 Persian collation support was provided by
276 Jody McIntyre <mysql@modernduck.com>
277
278 To: internals@lists.mysql.com
279 Subject: Persian UTF8 collation support
280 Date: 17.08.2004
281
282 Contraction is not implemented. Some implementations do perform
283 contraction but others do not, and it is able to sort all my test
284 strings correctly.
285
286 Jody.
287 */
288 static const char persian[] =
289 "& \\u066D < \\u064E < \\uFE76 < \\uFE77 < \\u0650 < \\uFE7A < \\uFE7B"
290 " < \\u064F < \\uFE78 < \\uFE79 < \\u064B < \\uFE70 < \\uFE71"
291 " < \\u064D < \\uFE74 < \\u064C < \\uFE72"
292 "& \\uFE7F < \\u0653 < \\u0654 < \\u0655 < \\u0670"
293 "& \\u0669 < \\u0622 < \\u0627 < \\u0671 < \\u0621 < \\u0623 < \\u0625"
294 " < \\u0624 < \\u0626"
295 "& \\u0642 < \\u06A9 < \\u0643"
296 "& \\u0648 < \\u0647 < \\u0629 < \\u06C0 < \\u06CC < \\u0649 < \\u064A"
297 "& \\uFE80 < \\uFE81 < \\uFE82 < \\uFE8D < \\uFE8E < \\uFB50 < \\uFB51"
298 " < \\uFE80 "
299 /*
300 FE80 appears both in reset and shift.
301 We need to break the rule here and reset to *new* FE80 again,
302 so weight for FE83 is calculated as P[FE80]+1, not as P[FE80]+8.
303 */
304 " & \\uFE80 < \\uFE83 < \\uFE84 < \\uFE87 < \\uFE88 < \\uFE85"
305 " < \\uFE86 < \\u0689 < \\u068A"
306 "& \\uFEAE < \\uFDFC"
307 "& \\uFED8 < \\uFB8E < \\uFB8F < \\uFB90 < \\uFB91 < \\uFED9 < \\uFEDA"
308 " < \\uFEDB < \\uFEDC"
309 "& \\uFEEE < \\uFEE9 < \\uFEEA < \\uFEEB < \\uFEEC < \\uFE93 < \\uFE94"
310 " < \\uFBA4 < \\uFBA5 < \\uFBFC < \\uFBFD < \\uFBFE < \\uFBFF"
311 " < \\uFEEF < \\uFEF0 < \\uFEF1 < \\uFEF2 < \\uFEF3 < \\uFEF4"
312 " < \\uFEF5 < \\uFEF6 < \\uFEF7 < \\uFEF8 < \\uFEF9 < \\uFEFA"
313 " < \\uFEFB < \\uFEFC";
314
315 /*
316 Esperanto tailoring.
317 Contributed by Bertilo Wennergren <bertilow at gmail dot com>
318 September 1, 2005
319 */
320 static const char esperanto[] =
321 "& C < \\u0109 <<< \\u0108"
322 "& G < \\u011D <<< \\u011C"
323 "& H < \\u0125 <<< \\u0124"
324 "& J < \\u0135 <<< \\u0134"
325 "& S < \\u015d <<< \\u015c"
326 "& U < \\u016d <<< \\u016c";
327
328 /*
329 A simplified version of Hungarian, without consonant contractions.
330 */
331 static const char hungarian[] =
332 "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150"
333 "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170";
334
335 static const char croatian[] =
336 "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106"
337 "&D < d\\u017E = \\u01C6 <<< d\\u017D <<< D\\u017E = \\u01C5 <<< D\\u017D "
338 "= \\u01C4"
339 " < \\u0111 <<< \\u0110"
340 "&L < lj = \\u01C9 <<< lJ <<< Lj = \\u01C8 <<< LJ = \\u01C7"
341 "&N < nj = \\u01CC <<< nJ <<< Nj = \\u01CB <<< NJ = \\u01CA"
342 "&S < \\u0161 <<< \\u0160"
343 "&Z < \\u017E <<< \\u017D";
344
345 /*
346 SCCII Part 1 : Collation Sequence (SLS1134)
347 2006/11/24
348 Harshula Jayasuriya <harshula at gmail dot com>
349 Language Technology Research Lab, University of Colombo / ICTA
350 */
351 #if 0
352 static const char sinhala[]=
353 "& \\u0D96 < \\u0D82 < \\u0D83"
354 "& \\u0DA5 < \\u0DA4"
355 "& \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3"
356 "& \\u0DDE < \\u0DCA";
357 #else
358 static const char sinhala[] =
359 "& \\u0D96 < \\u0D82 < \\u0D83 < \\u0D9A < \\u0D9B < \\u0D9C < \\u0D9D"
360 "< \\u0D9E < \\u0D9F < \\u0DA0 < \\u0DA1 < \\u0DA2 < \\u0DA3"
361 "< \\u0DA5 < \\u0DA4 < \\u0DA6"
362 "< \\u0DA7 < \\u0DA8 < \\u0DA9 < \\u0DAA < \\u0DAB < \\u0DAC"
363 "< \\u0DAD < \\u0DAE < \\u0DAF < \\u0DB0 < \\u0DB1"
364 "< \\u0DB3 < \\u0DB4 < \\u0DB5 < \\u0DB6 < \\u0DB7 < \\u0DB8"
365 "< \\u0DB9 < \\u0DBA < \\u0DBB < \\u0DBD < \\u0DC0 < \\u0DC1"
366 "< \\u0DC2 < \\u0DC3 < \\u0DC4 < \\u0DC5 < \\u0DC6"
367 "< \\u0DCF"
368 "< \\u0DD0 < \\u0DD1 < \\u0DD2 < \\u0DD3 < \\u0DD4 < \\u0DD6"
369 "< \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3 < \\u0DD9 < \\u0DDA"
370 "< \\u0DDB < \\u0DDC < \\u0DDD < \\u0DDE < \\u0DCA";
371 #endif
372
373 static const char vietnamese[] =
374 " &A << \\u00E0 <<< \\u00C0" /* A */
375 " << \\u1EA3 <<< \\u1EA2"
376 " << \\u00E3 <<< \\u00C3"
377 " << \\u00E1 <<< \\u00C1"
378 " << \\u1EA1 <<< \\u1EA0"
379 " < \\u0103 <<< \\u0102" /* A WITH BREVE */
380 " << \\u1EB1 <<< \\u1EB0"
381 " << \\u1EB3 <<< \\u1EB2"
382 " << \\u1EB5 <<< \\u1EB4"
383 " << \\u1EAF <<< \\u1EAE"
384 " << \\u1EB7 <<< \\u1EB6"
385 " < \\u00E2 <<< \\u00C2" /* A WITH CIRCUMFLEX */
386 " << \\u1EA7 <<< \\u1EA6"
387 " << \\u1EA9 <<< \\u1EA8"
388 " << \\u1EAB <<< \\u1EAA"
389 " << \\u1EA5 <<< \\u1EA4"
390 " << \\u1EAD <<< \\u1EAC"
391 " &D < \\u0111 <<< \\u0110" /* D WITH STROKE */
392 " &E << \\u00E8 <<< \\u00C8" /* E */
393 " << \\u1EBB <<< \\u1EBA"
394 " << \\u1EBD <<< \\u1EBC"
395 " << \\u00E9 <<< \\u00C9"
396 " << \\u1EB9 <<< \\u1EB8"
397 " < \\u00EA <<< \\u00CA" /* E WITH CIRCUMFLEX */
398 " << \\u1EC1 <<< \\u1EC0"
399 " << \\u1EC3 <<< \\u1EC2"
400 " << \\u1EC5 <<< \\u1EC4"
401 " << \\u1EBF <<< \\u1EBE"
402 " << \\u1EC7 <<< \\u1EC6"
403 " &I << \\u00EC <<< \\u00CC" /* I */
404 " << \\u1EC9 <<< \\u1EC8"
405 " << \\u0129 <<< \\u0128"
406 " << \\u00ED <<< \\u00CD"
407 " << \\u1ECB <<< \\u1ECA"
408 " &O << \\u00F2 <<< \\u00D2" /* O */
409 " << \\u1ECF <<< \\u1ECE"
410 " << \\u00F5 <<< \\u00D5"
411 " << \\u00F3 <<< \\u00D3"
412 " << \\u1ECD <<< \\u1ECC"
413 " < \\u00F4 <<< \\u00D4" /* O WITH CIRCUMFLEX */
414 " << \\u1ED3 <<< \\u1ED2"
415 " << \\u1ED5 <<< \\u1ED4"
416 " << \\u1ED7 <<< \\u1ED6"
417 " << \\u1ED1 <<< \\u1ED0"
418 " << \\u1ED9 <<< \\u1ED8"
419 " < \\u01A1 <<< \\u01A0" /* O WITH HORN */
420 " << \\u1EDD <<< \\u1EDC"
421 " << \\u1EDF <<< \\u1EDE"
422 " << \\u1EE1 <<< \\u1EE0"
423 " << \\u1EDB <<< \\u1EDA"
424 " << \\u1EE3 <<< \\u1EE2"
425 " &U << \\u00F9 <<< \\u00D9" /* U */
426 " << \\u1EE7 <<< \\u1EE6"
427 " << \\u0169 <<< \\u0168"
428 " << \\u00FA <<< \\u00DA"
429 " << \\u1EE5 <<< \\u1EE4"
430 " < \\u01B0 <<< \\u01AF" /* U WITH HORN */
431 " << \\u1EEB <<< \\u1EEA"
432 " << \\u1EED <<< \\u1EEC"
433 " << \\u1EEF <<< \\u1EEE"
434 " << \\u1EE9 <<< \\u1EE8"
435 " << \\u1EF1 <<< \\u1EF0"
436 " &Y << \\u1EF3 <<< \\u1EF2" /* Y */
437 " << \\u1EF7 <<< \\u1EF6"
438 " << \\u1EF9 <<< \\u1EF8"
439 " << \\u00FD <<< \\u00DD"
440 " << \\u1EF5 <<< \\u1EF4";
441
442 /* German Phonebook */
443 static const char de_pb_cldr_30[] =
444 "&AE << \\u00E4 <<< \\u00C4 "
445 "&OE << \\u00F6 <<< \\u00D6 "
446 "&UE << \\u00FC <<< \\u00DC ";
447
448 /* Icelandic */
449 static const char is_cldr_30[] =
450 "&[before 1]b < \\u00E1 <<< \\u00C1 "
451 "& d << \\u0111 <<< \\u0110 < \\u00F0 <<< \\u00D0 "
452 "&[before 1]f < \\u00E9 <<< \\u00C9 "
453 "&[before 1]j < \\u00ED <<< \\u00CD "
454 "&[before 1]p < \\u00F3 <<< \\u00D3 "
455 "&[before 1]v < \\u00FA <<< \\u00DA "
456 "&[before 1]z < \\u00FD <<< \\u00DD "
457 "&[before 1]\\u01C0 < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
458 "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
459 "< \\u00E5 <<< \\u00C5";
460
461 /* Latvian */
462 static const char lv_cldr_30[] =
463 "&[before 1]D < \\u010D <<< \\u010C "
464 "&[before 1]H < \\u0123 <<< \\u0122 "
465 "& I << y <<< Y "
466 "&[before 1]L < \\u0137 <<< \\u0136 "
467 "&[before 1]M < \\u013C <<< \\u013B "
468 "&[before 1]O < \\u0146 <<< \\u0145 "
469 "&[before 1]S < \\u0157 <<< \\u0156 "
470 "&[before 1]T < \\u0161 <<< \\u0160 "
471 "&[before 1]\\u01B7 < \\u017E <<< \\u017D";
472
473 /* Romanian */
474 static const char ro_cldr_30[] =
475 "&A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
476 "&I < \\u00EE <<< \\u00CE "
477 "&S < \\u015F = \\u0219 <<< \\u015E = \\u0218 "
478 "&T < \\u0163 = \\u021B <<< \\u0162 = \\u021A";
479
480 /* Slovenian */
481 static const char sl_cldr_30[] =
482 "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 "
483 "&D < \\u0111 <<< \\u0110 "
484 "&S < \\u0161 <<< \\u0160 "
485 "&Z < \\u017E <<< \\u017D";
486
487 /* Polish */
488 static const char pl_cldr_30[] =
489 "&A < \\u0105 <<< \\u0104 "
490 "&C < \\u0107 <<< \\u0106 "
491 "&E < \\u0119 <<< \\u0118 "
492 "&L < \\u0142 <<< \\u0141 "
493 "&N < \\u0144 <<< \\u0143 "
494 "&O < \\u00F3 <<< \\u00D3 "
495 "&S < \\u015B <<< \\u015A "
496 "&Z < \\u017A <<< \\u0179 < \\u017C <<< \\u017B";
497
498 /* Estonian */
499 static const char et_cldr_30[] =
500 "&[before 1]T < \\u0161 <<< \\u0160 < z <<< Z "
501 "< \\u017E <<< \\u017D "
502 "&[before 1]X < \\u00F5 <<< \\u00D5 < \\u00E4 <<< \\u00C4 "
503 "< \\u00F6 <<< \\u00D6 < \\u00FC <<< \\u00DC";
504
505 /* Swedish */
506 static const char sv_cldr_30[] =
507 "& D << \\u0111 <<< \\u0110 << \\u00F0 <<< \\u00D0 "
508 "& t <<< \\u00FE/h "
509 "& T <<< \\u00DE/H "
510 "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 "
511 "&[before 1]\\u01C0 < \\u00E5 <<< \\u00C5 < \\u00E4 <<< \\u00C4 "
512 "<< \\u00E6 <<< \\u00C6 << \\u0119 <<< \\u0118 "
513 "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
514 "<< \\u0151 <<< \\u0150 << \\u0153 <<< \\u0152 "
515 "<< \\u00F4 <<< \\u00D4";
516
517 /* Turkish */
518 static const char tr_cldr_30[] =
519 "& C < \\u00E7 <<< \\u00C7 "
520 "& G < \\u011F <<< \\u011E "
521 "&[before 1]i < \\u0131 <<< I "
522 "& i <<< \\u0130 "
523 "& O < \\u00F6 <<< \\u00D6 "
524 "& S < \\u015F <<< \\u015E "
525 "& U < \\u00FC <<< \\u00DC ";
526
527 /* Czech */
528 static const char cs_cldr_30[] =
529 "&C < \\u010D <<< \\u010C "
530 "&H < ch <<< cH <<< Ch <<< CH "
531 "&R < \\u0159 <<< \\u0158"
532 "&S < \\u0161 <<< \\u0160"
533 "&Z < \\u017E <<< \\u017D";
534
535 /* Danish, same for Norwegian */
536 static const char da_cldr_30[] =
537 "& D << \\u0111 <<< \\u0110 << \\u00F0 <<< \\u00D0 "
538 "& t <<< \\u00FE/h "
539 "& T <<< \\u00DE/H "
540 "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 "
541 "&[before 1]\\u01C0 < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
542 "< \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 "
543 "<< \\u0151 <<< \\u0150 << \\u0153 <<< \\u0152 "
544 "< \\u00E5 <<< \\u00C5 <<< aa <<< Aa "
545 "<<< AA";
546
547 static Coll_param da_coll_param = {nullptr, false, CASE_FIRST_UPPER};
548
549 /* CASE FIRST OFF for Norwegian */
550 static Coll_param no_coll_param = {nullptr, false, CASE_FIRST_OFF};
551
552 /* Lithuanian */
553 static const char lt_cldr_30[] =
554 "&\\u0300 = \\u0307\\u0300 "
555 "&\\u0301 = \\u0307\\u0301 "
556 "&\\u0303 = \\u0307\\u0303 "
557 "&A << \\u0105 <<< \\u0104 "
558 "&C < \\u010D <<< \\u010C "
559 "&E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116"
560 "&I << \\u012F <<< \\u012E << y <<< Y "
561 "&S < \\u0161 <<< \\u0160 "
562 "&U << \\u0173 <<< \\u0172 << \\u016B <<< \\u016A "
563 "&Z < \\u017E <<< \\u017D";
564
565 /* Slovak */
566 static const char sk_cldr_30[] =
567 "&A < \\u00E4 <<< \\u00C4 "
568 "&C < \\u010D <<< \\u010C "
569 "&H < ch <<< cH <<< Ch <<< CH "
570 "&O < \\u00F4 <<< \\u00D4 "
571 "&R < \\u0159 <<< \\u0158 "
572 "&S < \\u0161 <<< \\u0160 "
573 "&Z < \\u017E <<< \\u017D";
574
575 /* Spanish (Traditional) */
576 static const char es_trad_cldr_30[] =
577 "&N < \\u00F1 <<< \\u00D1 "
578 "&C < ch <<< Ch <<< CH "
579 "&l < ll <<< Ll <<< LL";
580
581 /* Persian */
582 #if 0
583 static const char fa_cldr_30[]=
584 "& \\u064E << \\u0650 << \\u064F << \\u064B << \\u064D "
585 "<< \\u064C "
586 "&[before 1]\\u0627 < \\u0622 "
587 "& \\u0627 << \\u0671 < \\u0621 << \\u0623 << \\u0672 "
588 "<< \\u0625 << \\u0673 << \\u0624 << \\u06CC\\u0654 "
589 "<<< \\u0649\\u0654 <<< \\u0626 "
590 "& \\u06A9 << \\u06AA << \\u06AB << \\u0643 << \\u06AC "
591 "<< \\u06AD << \\u06AE "
592 "& \\u06CF < \\u0647 << \\u06D5 << \\u06C1 << \\u0629 "
593 "<< \\u06C3 << \\u06C0 << \\u06BE "
594 "& \\u06CC << \\u0649 << \\u06D2 << \\u064A << \\u06D0 "
595 "<< \\u06D1 << \\u06CD << \\u06CE";
596
597 static Reorder_param fa_reorder_param= {
598 {CHARGRP_ARAB, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0
599 };
600
601 static Coll_param fa_coll_param= {
602 &fa_reorder_param, true
603 };
604 #endif
605
606 /* Hungarian */
607 static const char hu_cldr_30[] =
608 "&C < cs <<< Cs <<< CS "
609 "&D < dz <<< Dz <<< DZ "
610 "&DZ < dzs <<< Dzs <<< DZS "
611 "&G < gy <<< Gy <<< GY "
612 "&L < ly <<< Ly <<< LY "
613 "&N < ny <<< Ny <<< NY "
614 "&S < sz <<< Sz <<< SZ "
615 "&T < ty <<< Ty <<< TY "
616 "&Z < zs <<< Zs <<< ZS "
617 "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150 "
618 "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 "
619 "&cs <<< ccs/cs "
620 "&Cs <<< Ccs/cs "
621 "&CS <<< CCS/CS "
622 "&dz <<< ddz/dz "
623 "&Dz <<< Ddz/dz "
624 "&DZ <<< DDZ/DZ "
625 "&dzs<<< ddzs/dzs "
626 "&Dzs<<< Ddzs/dzs "
627 "&DZS<<< DDZS/DZS "
628 "&gy <<< ggy/gy "
629 "&Gy <<< Ggy/gy "
630 "&GY <<< GGY/GY "
631 "&ly <<< lly/ly "
632 "&Ly <<< Lly/ly "
633 "&LY <<< LLY/LY "
634 "&ny <<< nny/ny "
635 "&Ny <<< Nny/ny "
636 "&NY <<< NNY/NY "
637 "&sz <<< ssz/sz "
638 "&Sz <<< Ssz/sz "
639 "&SZ <<< SSZ/SZ "
640 "&ty <<< tty/ty "
641 "&Ty <<< Tty/ty "
642 "&TY <<< TTY/TY "
643 "&zs <<< zzs/zs "
644 "&Zs <<< Zzs/zs "
645 "&ZS <<< ZZS/ZS";
646
647 /* Croatian, same for Serbian with Latin and Bosnian. */
648 static const char hr_cldr_30[] =
649 "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 "
650 "&D < d\\u017E <<< \\u01C6 <<< D\\u017E <<< \\u01C5 <<< D\\u017D "
651 "<<< \\u01C4 < \\u0111 <<< \\u0110 "
652 "&L < lj <<< \\u01C9 <<< Lj <<< \\u01C8 <<< LJ "
653 "<<< \\u01C7 "
654 "&N < nj <<< \\u01CC <<< Nj <<< \\u01CB <<< NJ "
655 "<<< \\u01CA "
656 "&S < \\u0161 <<< \\u0160 "
657 "&Z < \\u017E <<< \\u017D ";
658
659 static Reorder_param hr_reorder_param = {
660 {CHARGRP_LATIN, CHARGRP_CYRILLIC, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0};
661
662 static Coll_param hr_coll_param = {&hr_reorder_param, false, CASE_FIRST_OFF};
663
664 /* Sinhala */
665 #if 0
666 static const char si_cldr_30[]=
667 "&\\u0D96 < \\u0D82 < \\u0D83 "
668 "&\\u0DA5 < \\u0DA4";
669 #endif
670
671 /* Vietnamese */
672 static const char vi_cldr_30[] =
673 "&\\u0300 << \\u0309 << \\u0303 << \\u0301 << \\u0323 "
674 "&a < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
675 "&d < \\u0111 <<< \\u0110 "
676 "&e < \\u00EA <<< \\u00CA "
677 "&o < \\u00F4 <<< \\u00D4 < \\u01A1 <<< \\u01A0 "
678 "&u < \\u01B0 <<< \\u01AF";
679
680 static Coll_param vi_coll_param = {nullptr, true, CASE_FIRST_OFF};
681
682 static Reorder_param ja_reorder_param = {
683 /*
684 Per CLDR 30, Japanese reorder rule is defined as [Latn Kana Hani],
685 but for Hani characters, their weight is implicit according to UCA,
686 which is different from other character groups. We don't add "Hani"
687 below and will have special handling for them in
688 adjust_japanese_weight() and apply_reorder_param(). Implicit weight
689 has two collation elements. To make strnxfrm() run faster, we give
690 Japanese Han characters tailored weight which has only one collation
691 element. These characters' weight is defined in ja_han_pages.
692 */
693 {CHARGRP_LATIN, CHARGRP_KANA, CHARGRP_NONE},
694 {{{0, 0}, {0, 0}}},
695 0,
696 0};
697
698 static Coll_param ja_coll_param = {&ja_reorder_param, false /*norm_enabled*/,
699 CASE_FIRST_OFF};
700
701 /*
702 The Chinese reorder rule is defined as [Hani]. This means all Han characters'
703 weight should be greater than the core group and smaller than any other
704 character groups.
705 The Han characters are separated into two parts. The CLDR collation
706 definition file, zh.xml, defines 41336 Han characters' order, and all other
707 Han characters have implicit weight.
708 Since the core group characters occupy the weight value from 0x0209 to 0x1C46
709 in DUCET, so we decide to set the weight of all Han characters defined in
710 zh.xml to be the value from 0x1C47 to 0xBDBE. The smallest weight value of
711 these Han characters, 0x1C47, being the largest weight value of the core
712 group plus one (0x1C46 + 1), ensures these Han characters sort greater than
713 the core group characters.
714 Also, we set the implicit weight to the Han characters like
715 [BDBF - BDC3, 0020, 0002][XXXX, 0000, 0000].
716 To tailor the weight of characters of Latin, Cyrillic and so on to be bigger
717 than all Han characters, we give these characters weights from 0xBDC4 to
718 0xF620. There are many character groups between the core group and the Han
719 group, so it would be a long list if we put them in the following reorder_grp
720 structure. But since it is a very simple weight shift, we put their calculated
721 weight here and do not calculate it in my_prepare_reorder().
722
723 NOTE: We use the zh.xml file from CLDR v33.1 to implement this Chinese
724 collation, because we found that the file of CLDR v30 is missing some very
725 common Han characters (the Han character 'small', etc).
726 */
727 static Reorder_param zh_reorder_param = {
728 {CHARGRP_NONE}, {{{0x1C47, 0x54A3}, {0xBDC4, 0xF620}}}, 1, 0x54A3};
729
730 static Coll_param zh_coll_param = {&zh_reorder_param, false, CASE_FIRST_OFF};
731
732 /* Russian, same for Bulgerian and Mongolian with Cyrillic letters */
733 static Reorder_param ru_reorder_param = {
734 {CHARGRP_CYRILLIC, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0};
735
736 static Coll_param ru_coll_param = {&ru_reorder_param, false /*norm_enabled*/,
737 CASE_FIRST_OFF};
738
739 static constexpr uint16 nochar[] = {0, 0};
740
741 /**
742 Unicode Collation Algorithm:
743 Collation element (weight) scanner,
744 for consequent scan of collations
745 weights from a string.
746
747 Only meant as a base class; instantiate uca_scanner_any or uca_scanner_900
748 instead of this.
749 */
750 class my_uca_scanner {
751 protected:
752 10416379100 my_uca_scanner(const CHARSET_INFO *cs_arg, const uchar *str, size_t length)
753 10416379100 : wbeg(nochar),
754 10416379100 sbeg(str),
755 10416379100 send(str + length),
756 10416379100 uca(cs_arg->uca),
757 10416379100 cs(cs_arg),
758 10416379100 sbeg_dup(str) {}
759
760 public:
761 /**
762 Get the level the scanner is currently working on. The string
763 can be scanned multiple times (if the collation requires multi-level
764 comparisons, e.g. for accent or case sensitivity); first to get
765 primary weights, then from the start again for secondary, etc.
766 */
767 31759312370 uint get_weight_level() const { return weight_lv; }
768
769 protected:
770 uint weight_lv{0}; /* 0 = Primary, 1 = Secondary, 2 = Tertiary */
771 const uint16 *wbeg; /* Beginning of the current weight string */
772 uint wbeg_stride{0}; /* Number of bytes between weights in string */
773 const uchar *sbeg; /* Beginning of the input string */
774 const uchar *send; /* End of the input string */
775 const MY_UCA_INFO *uca;
776 uint16 implicit[10];
777 my_wc_t prev_char{0}; // Previous code point we scanned, if any.
778 const CHARSET_INFO *cs;
779 uint num_of_ce_left{0};
780 const uchar *sbeg_dup; /* Backup of beginning of input string */
781
782 protected:
783 const uint16 *contraction_find(my_wc_t wc0, size_t *chars_skipped);
784 inline const uint16 *previous_context_find(my_wc_t wc0, my_wc_t wc1);
785 };
786
787 /*
788 Charset dependent scanner part, to optimize
789 some character sets.
790 */
791
792 template <class Mb_wc>
793 struct uca_scanner_any : public my_uca_scanner {
794 170671774 uca_scanner_any(const Mb_wc mb_wc, const CHARSET_INFO *cs_arg,
795 const uchar *str, size_t length)
796 170671774 : my_uca_scanner(cs_arg, str, length), mb_wc(mb_wc) {
797 // UCA 9.0.0 uses a different table format from what this scanner expects.
798
2/4
✓ Branch 0 taken 85335853 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 85335853 times.
170671654 assert(cs_arg->uca == nullptr || cs_arg->uca->version != UCA_V900);
799 170671654 }
800
801 339039884 uint get_char_index() const { return char_index; }
802
803 inline int next();
804
805 private:
806 /**
807 How many code points (possibly multibyte) we have scanned so far.
808 This includes code points with zero weight. Note that this is reset
809 once we get to the end of the string and restart the scanning for
810 the next weight level, but it is _not_ reset when we reach the
811 end of the last level.
812 */
813 uint char_index{0};
814
815 const Mb_wc mb_wc;
816
817 inline int next_implicit(my_wc_t ch);
818 };
819
820 template <class Mb_wc, int LEVELS_FOR_COMPARE>
821 class uca_scanner_900 : public my_uca_scanner {
822 public:
823 20662068906 uca_scanner_900(const Mb_wc mb_wc, const CHARSET_INFO *cs_arg,
824 const uchar *str, size_t length)
825 20662068906 : my_uca_scanner(cs_arg, str, length), mb_wc(mb_wc) {}
826
827 inline int next();
828
829 /**
830 For each weight in sequence, call "func", which should have
831 a function signature of "bool func(int weight, bool is_level_separator)".
832 Stops the iteration early if "func" returns false.
833
834 This is morally equivalent to
835
836 int weight;
837 while ((weight= next()) >= 0)
838 {
839 if (!func(weight, weight == 0)) break;
840 }
841
842 except that it might employ optimizations internally to speed up
843 the process. These optimizations will not modify the number of calls
844 to func() (or their order), but might affect the internal scanner
845 state during the calls, so func() should not try to read from
846 the scanner except by calling public member functions.
847
848 As a special optimization, if "bool preaccept_data(int num_weights)"
849 returns true, the next "num_weights" calls to func() _must_ return
850 true. This is so that bounds checking costs can be amortized
851 over fewer calls.
852 */
853 template <class T, class U>
854 inline void for_each_weight(T func, U preaccept_data);
855
856 private:
857 const Mb_wc mb_wc;
858
859 inline int next_raw();
860 inline int more_weight();
861 uint16 apply_case_first(uint16 weight);
862 uint16 apply_reorder_param(uint16 weight);
863 inline int next_implicit(my_wc_t ch);
864 void my_put_jamo_weights(my_wc_t *hangul_jamo, int jamo_cnt);
865 /*
866 apply_reorder_param() needs to return two weights for each origin
867 weight. This boolean signals whether we have already returned the
868 FB86 weight, and are ready to return the origin weight.
869 */
870 bool return_origin_weight{true};
871 /*
872 For Japanese kana-sensitive collation, we only add quaternary
873 weight for katakana and hiragana, but not for others like latin
874 and kanji, because characters like latin and kanji can be already
875 distinguished from kana by three levels of weight.
876 has_quaternary_weight is to indicate whether quaternary weight is
877 needed for characters in string.
878 */
879 bool has_quaternary_weight{false};
880 int handle_ja_contraction_quat_wt();
881 int handle_ja_common_quat_wt(my_wc_t wc);
882 };
883
884 /********** Helper functions to handle contraction ************/
885
886 /**
887 Mark a code point as a contraction part
888
889 @param flags Pointer to UCA contraction flag data
890 @param wc Unicode code point
891 @param flag flag: "is contraction head", "is contraction tail"
892 */
893
894 497850 static inline void my_uca_add_contraction_flag(char *flags, my_wc_t wc,
895 int flag) {
896 497850 flags[wc & MY_UCA_CNT_FLAG_MASK] |= flag;
897 497850 }
898
899 /**
900 Check if UCA level data has contractions.
901
902 @param uca Pointer to UCA data
903
904 @return Flags indicating if UCA with contractions
905 @retval 0 - no contractions
906 @retval 1 - there are some contractions
907 */
908
909 31606605764 static inline bool my_uca_have_contractions(const MY_UCA_INFO *uca) {
910 31606605764 return uca->have_contractions;
911 }
912
913 struct trie_node_cmp {
914 2453235 bool operator()(const MY_CONTRACTION &a, const my_wc_t b) { return a.ch < b; }
915 bool operator()(const MY_CONTRACTION &a, const MY_CONTRACTION &b) {
916 return a.ch < b.ch;
917 }
918 };
919
920 static std::vector<MY_CONTRACTION>::const_iterator
921 328047 find_contraction_part_in_trie(const std::vector<MY_CONTRACTION> &cont_nodes,
922 my_wc_t ch) {
923
2/2
✓ Branch 0 taken 18939 times.
✓ Branch 1 taken 309108 times.
328047 if (cont_nodes.empty()) return cont_nodes.end();
924 309108 return std::lower_bound(cont_nodes.begin(), cont_nodes.end(), ch,
925 309108 trie_node_cmp());
926 }
927
928 498832 static std::vector<MY_CONTRACTION>::iterator find_contraction_part_in_trie(
929 std::vector<MY_CONTRACTION> &cont_nodes, my_wc_t ch) {
930
2/2
✓ Branch 0 taken 47465 times.
✓ Branch 1 taken 451367 times.
498832 if (cont_nodes.empty()) return cont_nodes.end();
931 451367 return std::lower_bound(cont_nodes.begin(), cont_nodes.end(), ch,
932 451367 trie_node_cmp());
933 }
934 /**
935 Find a contraction consisting of two code points and return its weight array
936
937 @param cont_nodes Vector that contains contraction nodes
938 @param wc1 First code point
939 @param wc2 Second code point
940
941 @return Weight array
942 @retval NULL - no contraction found
943 @retval ptr - contraction weight array
944 */
945
946 199 const uint16 *my_uca_contraction2_weight(
947 const std::vector<MY_CONTRACTION> *cont_nodes, my_wc_t wc1, my_wc_t wc2) {
948
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 199 times.
199 if (!cont_nodes) return nullptr;
949
950
1/2
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
199 if (!cont_nodes->empty()) {
951 std::vector<MY_CONTRACTION>::const_iterator node_it1 =
952
1/2
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
199 find_contraction_part_in_trie(*cont_nodes, wc1);
953
3/6
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 199 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 199 times.
398 if (node_it1 == cont_nodes->end() || node_it1->ch != wc1) return nullptr;
954 std::vector<MY_CONTRACTION>::const_iterator node_it2 =
955
1/2
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
199 find_contraction_part_in_trie(node_it1->child_nodes, wc2);
956
3/6
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 199 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 199 times.
✗ Branch 5 not taken.
398 if (node_it2 != node_it1->child_nodes.end() && node_it2->ch == wc2 &&
957
1/2
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
199 node_it2->is_contraction_tail)
958 199 return node_it2->weight;
959 }
960 return nullptr;
961 }
962
963 /**
964 Check if a code point can be previous context head
965
966 @param flags Pointer to UCA contraction flag data
967 @param wc Code point
968
969 @retval false - cannot be previous context head
970 @retval true - can be previous context head
971 */
972
973 9230 static inline bool my_uca_can_be_previous_context_head(const char *flags,
974 my_wc_t wc) {
975 9230 return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
976 }
977
978 /**
979 Check if a code point can be previous context tail
980
981 @param flags Pointer to UCA contraction flag data
982 @param wc Code point
983
984 @retval false - cannot be contraction tail
985 @retval true - can be contraction tail
986 */
987
988 61085799 static inline bool my_uca_can_be_previous_context_tail(const char *flags,
989 my_wc_t wc) {
990 61085799 return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
991 }
992
993 /**
994 Check if a string is a contraction of exactly the given length,
995 and return its weight array on success.
996
997 @param cont_nodes Vector that contains contraction nodes
998 @param wc Pointer to wide string
999 @param len String length
1000
1001 @return Weight array
1002 @retval NULL - Input string is not a known contraction
1003 @retval ptr - contraction weight array
1004 */
1005
1006 344802 static inline const uint16 *my_uca_contraction_weight(
1007 const std::vector<MY_CONTRACTION> *cont_nodes, const my_wc_t *wc,
1008 size_t len) {
1009
2/2
✓ Branch 0 taken 165537 times.
✓ Branch 1 taken 179265 times.
344802 if (!cont_nodes) return nullptr;
1010
1011 179265 std::vector<MY_CONTRACTION>::const_iterator node_it;
1012
2/2
✓ Branch 0 taken 246851 times.
✓ Branch 1 taken 19194 times.
266045 for (size_t ch_ind = 0; ch_ind < len; ++ch_ind) {
1013
1/2
✓ Branch 0 taken 246851 times.
✗ Branch 1 not taken.
246851 node_it = find_contraction_part_in_trie(*cont_nodes, wc[ch_ind]);
1014
6/6
✓ Branch 0 taken 183195 times.
✓ Branch 1 taken 63656 times.
✓ Branch 2 taken 96415 times.
✓ Branch 3 taken 86780 times.
✓ Branch 4 taken 160071 times.
✓ Branch 5 taken 86780 times.
246851 if (node_it == cont_nodes->end() || node_it->ch != wc[ch_ind])
1015 160071 return nullptr;
1016 86780 cont_nodes = &node_it->child_nodes;
1017 }
1018
2/2
✓ Branch 0 taken 18318 times.
✓ Branch 1 taken 876 times.
19194 if (node_it->is_contraction_tail) return node_it->weight;
1019 876 return nullptr;
1020 }
1021
1022 /**
1023 Return length of a 0-terminated wide string, analogous to strnlen().
1024
1025 @param s Pointer to wide string
1026 @param maxlen Mamixum string length
1027
1028 @return string length, or maxlen if no '\0' is met.
1029 */
1030 2576421 static size_t my_wstrnlen(my_wc_t *s, size_t maxlen) {
1031
2/2
✓ Branch 0 taken 5964453 times.
✓ Branch 1 taken 994 times.
5965447 for (size_t i = 0; i < maxlen; i++) {
1032
2/2
✓ Branch 0 taken 2575427 times.
✓ Branch 1 taken 3389026 times.
5964453 if (s[i] == 0) return i;
1033 }
1034 994 return maxlen;
1035 }
1036
1037 /**
1038 Find a contraction in the input stream and return its weight array
1039
1040 Scan input code points to find a longest path in contraction trie
1041 which contains all these code points. If the ending node of this
1042 path is end of contraction, return the weight array.
1043
1044 @param wc0 The first code point of the contraction (which should have
1045 the MY_UCA_CNT_HEAD flag).
1046 @param[out] chars_skipped How many code points where skipped in the
1047 contraction we found. Only makes sense if we actually found one.
1048
1049 @return Weight array
1050 @retval NULL no contraction found
1051 @retval ptr contraction weight array
1052 */
1053
1054 70983 const uint16 *my_uca_scanner::contraction_find(my_wc_t wc0,
1055 size_t *chars_skipped) {
1056 70983 const uchar *beg = nullptr;
1057 70983 auto mb_wc = cs->cset->mb_wc;
1058
1059 70983 const uchar *s = sbeg;
1060 70983 const std::vector<MY_CONTRACTION> *cont_nodes = uca->contraction_nodes;
1061 70983 const MY_CONTRACTION *longest_contraction = nullptr;
1062 70983 std::vector<MY_CONTRACTION>::const_iterator node_it;
1063 for (;;) {
1064
1/2
✓ Branch 0 taken 79816 times.
✗ Branch 1 not taken.
79816 node_it = find_contraction_part_in_trie(*cont_nodes, wc0);
1065
6/6
✓ Branch 0 taken 15885 times.
✓ Branch 1 taken 63931 times.
✓ Branch 2 taken 2599 times.
✓ Branch 3 taken 13286 times.
✓ Branch 4 taken 66530 times.
✓ Branch 5 taken 13286 times.
79816 if (node_it == cont_nodes->end() || node_it->ch != wc0) break;
1066
2/2
✓ Branch 0 taken 3290 times.
✓ Branch 1 taken 9996 times.
13286 if (node_it->is_contraction_tail) {
1067 3290 longest_contraction = &(*node_it);
1068 3290 beg = s;
1069 3290 *chars_skipped = node_it->contraction_len - 1;
1070 }
1071 int mblen;
1072
3/4
✓ Branch 0 taken 13286 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4453 times.
✓ Branch 3 taken 8833 times.
13286 if ((mblen = mb_wc(cs, &wc0, s, send)) <= 0) break;
1073 8833 s += mblen;
1074 8833 cont_nodes = &node_it->child_nodes;
1075 8833 }
1076
1077
2/2
✓ Branch 0 taken 3065 times.
✓ Branch 1 taken 67918 times.
70983 if (longest_contraction != nullptr) {
1078 3065 const uint16 *cweight = longest_contraction->weight;
1079
2/2
✓ Branch 0 taken 544 times.
✓ Branch 1 taken 2521 times.
3065 if (uca->version == UCA_V900) {
1080 544 cweight += weight_lv;
1081 544 wbeg = cweight + MY_UCA_900_CE_SIZE;
1082 544 wbeg_stride = MY_UCA_900_CE_SIZE;
1083 544 num_of_ce_left = 7;
1084 } else {
1085 2521 wbeg = cweight + 1;
1086 2521 wbeg_stride = MY_UCA_900_CE_SIZE;
1087 }
1088 3065 sbeg = beg;
1089 3065 return cweight;
1090 }
1091 67918 return nullptr; /* No contractions were found */
1092 }
1093
1094 /**
1095 Find weight for contraction with previous context
1096 and return its weight array.
1097
1098 @param wc0 Previous code point
1099 @param wc1 Current code point
1100
1101 @return Weight array
1102 @retval NULL - no contraction with context found
1103 @retval ptr - contraction weight array
1104 */
1105 ALWAYS_INLINE
1106 const uint16 *my_uca_scanner::previous_context_find(my_wc_t wc0, my_wc_t wc1) {
1107 std::vector<MY_CONTRACTION>::const_iterator node_it1 =
1108 982 find_contraction_part_in_trie(*uca->contraction_nodes, wc1);
1109
18/444
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 559 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✓ Branch 51 taken 559 times.
✗ Branch 52 not taken.
✓ Branch 53 taken 559 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 213 times.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 213 times.
✗ Branch 64 not taken.
✓ Branch 65 taken 213 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✓ Branch 144 taken 56 times.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✓ Branch 147 taken 56 times.
✗ Branch 148 not taken.
✓ Branch 149 taken 56 times.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✓ Branch 264 taken 57 times.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✓ Branch 267 taken 57 times.
✗ Branch 268 not taken.
✓ Branch 269 taken 57 times.
✓ Branch 270 taken 57 times.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✓ Branch 273 taken 57 times.
✗ Branch 274 not taken.
✓ Branch 275 taken 57 times.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✓ Branch 336 taken 40 times.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✓ Branch 339 taken 40 times.
✗ Branch 340 not taken.
✓ Branch 341 taken 40 times.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
982 if (node_it1 == uca->contraction_nodes->end() || node_it1->ch != wc1)
1110 return nullptr;
1111 std::vector<MY_CONTRACTION>::const_iterator node_it2 =
1112
6/148
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 559 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 213 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 40 times.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
982 find_contraction_part_in_trie(node_it1->child_nodes_context, wc0);
1113
18/444
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 559 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 559 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 559 times.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 213 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 213 times.
✗ Branch 63 not taken.
✓ Branch 64 taken 213 times.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✓ Branch 144 taken 56 times.
✗ Branch 145 not taken.
✓ Branch 146 taken 56 times.
✗ Branch 147 not taken.
✓ Branch 148 taken 56 times.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✓ Branch 264 taken 57 times.
✗ Branch 265 not taken.
✓ Branch 266 taken 57 times.
✗ Branch 267 not taken.
✓ Branch 268 taken 57 times.
✗ Branch 269 not taken.
✓ Branch 270 taken 57 times.
✗ Branch 271 not taken.
✓ Branch 272 taken 57 times.
✗ Branch 273 not taken.
✓ Branch 274 taken 57 times.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✓ Branch 336 taken 40 times.
✗ Branch 337 not taken.
✓ Branch 338 taken 40 times.
✗ Branch 339 not taken.
✓ Branch 340 taken 40 times.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
982 if (node_it2 != node_it1->child_nodes_context.end() && node_it2->ch == wc0) {
1114
6/148
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 559 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 213 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✓ Branch 113 taken 40 times.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
982 if (uca->version == UCA_V900) {
1115 942 wbeg = node_it2->weight + MY_UCA_900_CE_SIZE + weight_lv;
1116 942 wbeg_stride = MY_UCA_900_CE_SIZE;
1117 942 num_of_ce_left = 7;
1118 } else {
1119 40 wbeg = node_it2->weight + 1;
1120 40 wbeg_stride = MY_UCA_900_CE_SIZE;
1121 }
1122 982 return node_it2->weight + weight_lv;
1123 }
1124 return nullptr;
1125 }
1126
1127 /****************************************************************/
1128 #define HANGUL_JAMO_MAX_LENGTH 3
1129 /**
1130 Check if a code point is Hangul syllable. Decompose it to jamos
1131 if it is, and return tailored weights.
1132
1133 @param syllable Hangul syllable to be decomposed
1134 @param[out] jamo Corresponding jamos
1135
1136 @return 0 The code point is not Hangul syllable
1137 or cannot be decomposed
1138 others The number of jamos returned
1139 */
1140 136057931 static int my_decompose_hangul_syllable(my_wc_t syllable, my_wc_t *jamo) {
1141
4/4
✓ Branch 0 taken 132779168 times.
✓ Branch 1 taken 3278763 times.
✓ Branch 2 taken 131381146 times.
✓ Branch 3 taken 1398022 times.
136057931 if (syllable < 0xAC00 || syllable > 0xD7AF) return 0;
1142 1398022 constexpr uint syllable_base = 0xAC00;
1143 1398022 constexpr uint leadingjamo_base = 0x1100;
1144 1398022 constexpr uint voweljamo_base = 0x1161;
1145 1398022 constexpr uint trailingjamo_base = 0x11A7;
1146 1398022 constexpr uint voweljamo_cnt = 21;
1147 1398022 constexpr uint trailingjamo_cnt = 28;
1148 1398022 const uint syllable_index = syllable - syllable_base;
1149 1398022 const uint v_t_combination = voweljamo_cnt * trailingjamo_cnt;
1150 1398022 const uint leadingjamo_index = syllable_index / v_t_combination;
1151 1398022 const uint voweljamo_index =
1152 1398022 (syllable_index % v_t_combination) / trailingjamo_cnt;
1153 1398022 const uint trailingjamo_index = syllable_index % trailingjamo_cnt;
1154 1398022 jamo[0] = leadingjamo_base + leadingjamo_index;
1155 1398022 jamo[1] = voweljamo_base + voweljamo_index;
1156
2/2
✓ Branch 0 taken 1347978 times.
✓ Branch 1 taken 50044 times.
1398022 jamo[2] = trailingjamo_index ? (trailingjamo_base + trailingjamo_index) : 0;
1157
2/2
✓ Branch 0 taken 1347978 times.
✓ Branch 1 taken 50044 times.
1398022 return trailingjamo_index ? 3 : 2;
1158 }
1159
1160 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1161 2796044 void uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::my_put_jamo_weights(
1162 my_wc_t *hangul_jamo, int jamo_cnt) {
1163
2/2
✓ Branch 0 taken 4144022 times.
✓ Branch 1 taken 1398022 times.
11084088 for (int jamoind = 0; jamoind < jamo_cnt; jamoind++) {
1164 8288044 uint16 *implicit_weight = implicit + jamoind * MY_UCA_900_CE_SIZE;
1165 8288044 uint page = hangul_jamo[jamoind] >> 8;
1166 8288044 uint code = hangul_jamo[jamoind] & 0xFF;
1167 8288044 const uint16 *jamo_weight_page = uca->weights[page];
1168 8288044 implicit_weight[0] = UCA900_WEIGHT(jamo_weight_page, 0, code);
1169 8288044 implicit_weight[1] = UCA900_WEIGHT(jamo_weight_page, 1, code);
1170 8288044 implicit_weight[2] = UCA900_WEIGHT(jamo_weight_page, 2, code);
1171 }
1172 2796044 implicit[9] = jamo_cnt;
1173 }
1174
1175 /*
1176 Chinese Han characters are assigned an implicit weight according to the
1177 Unicode Collation Algorithm. But when creating our Chinese collation for
1178 utf8mb4, to implement this language's reorder rule, we give the Han
1179 characters in CLDR zh.xml file weight values from 0x1C47 to 0xBDBE, and let
1180 the other Han characters keep their implicit weight. Per UCA, the smallest
1181 leading primary weight of the implicit weight is 0xFB00, and the largest
1182 primary weight we ocuppy for the Han characters in zh.xml is 0xBDBE. There is
1183 a huge gap between these two weight values. To use this weight value gap and
1184 let the character groups like Latin, Cyrillic, have a single primary weight as
1185 before reordering, we change the leading primary weight of the implicit weight
1186 as below.
1187 */
1188 4122335 static uint16 change_zh_implicit(uint16 weight) {
1189
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4122335 times.
4122335 assert(weight >= 0xFB00);
1190
6/7
✓ Branch 0 taken 20803 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1740 times.
✓ Branch 3 taken 435 times.
✓ Branch 4 taken 9715 times.
✓ Branch 5 taken 18023 times.
✓ Branch 6 taken 4071619 times.
4122335 switch (weight) {
1191 20803 case 0xFB00:
1192 20803 return 0xF621;
1193 case 0xFB40:
1194 return 0xBDBF;
1195 1740 case 0xFB41:
1196 1740 return 0xBDC0;
1197 435 case 0xFB80:
1198 435 return 0xBDC1;
1199 9715 case 0xFB84:
1200 9715 return 0xBDC2;
1201 18023 case 0xFB85:
1202 18023 return 0xBDC3;
1203 4071619 default:
1204 4071619 return weight + 0xF622 - 0xFBC0;
1205 }
1206 }
1207
1208 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1209 ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next_implicit(
1210 my_wc_t ch) {
1211 my_wc_t hangul_jamo[HANGUL_JAMO_MAX_LENGTH];
1212 int jamo_cnt;
1213
21/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 33024 times.
✓ Branch 17 taken 3125924 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 990720 times.
✓ Branch 21 taken 95375133 times.
✓ Branch 22 taken 33024 times.
✓ Branch 23 taken 3188937 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 22018 times.
✓ Branch 27 taken 2126018 times.
✓ Branch 28 taken 308224 times.
✓ Branch 29 taken 29761536 times.
✓ Branch 30 taken 11008 times.
✓ Branch 31 taken 1063158 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 32 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 3 times.
✓ Branch 95 taken 139 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 119 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✓ Branch 101 taken 138 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 120 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 1 times.
✓ Branch 107 taken 7952 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 10703 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
136057931 if ((jamo_cnt = my_decompose_hangul_syllable(ch, hangul_jamo))) {
1214 1398022 my_put_jamo_weights(hangul_jamo, jamo_cnt);
1215 1398022 num_of_ce_left = jamo_cnt - 1;
1216 1398022 wbeg = implicit + MY_UCA_900_CE_SIZE + weight_lv;
1217 1398022 wbeg_stride = MY_UCA_900_CE_SIZE;
1218 1398022 return *(implicit + weight_lv);
1219 }
1220
1221 /*
1222 We give the Chinese collation different leading primary weight to make
1223 sure there are enough single weight values to be assigned to character
1224 groups like Latin, Cyrillic, etc.
1225 */
1226 uint page;
1227
41/224
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 3035136 times.
✓ Branch 33 taken 90788 times.
✓ Branch 34 taken 20736 times.
✓ Branch 35 taken 3014400 times.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 90912181 times.
✓ Branch 41 taken 4462952 times.
✓ Branch 42 taken 622147 times.
✓ Branch 43 taken 90290034 times.
✓ Branch 44 taken 3035217 times.
✓ Branch 45 taken 153720 times.
✓ Branch 46 taken 20736 times.
✓ Branch 47 taken 3014481 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 2023478 times.
✓ Branch 53 taken 102540 times.
✓ Branch 54 taken 13824 times.
✓ Branch 55 taken 2009654 times.
✓ Branch 56 taken 28327936 times.
✓ Branch 57 taken 1433600 times.
✓ Branch 58 taken 193536 times.
✓ Branch 59 taken 28134400 times.
✓ Branch 60 taken 1011743 times.
✓ Branch 61 taken 51415 times.
✓ Branch 62 taken 6912 times.
✓ Branch 63 taken 1004831 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 2 times.
✓ Branch 125 taken 30 times.
✗ Branch 126 not taken.
✓ Branch 127 taken 2 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✓ Branch 188 taken 139 times.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✓ Branch 191 taken 139 times.
✓ Branch 192 taken 119 times.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✓ Branch 195 taken 119 times.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✓ Branch 200 taken 138 times.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✓ Branch 203 taken 138 times.
✓ Branch 204 taken 120 times.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✓ Branch 207 taken 120 times.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✓ Branch 212 taken 4251 times.
✓ Branch 213 taken 3701 times.
✗ Branch 214 not taken.
✓ Branch 215 taken 4251 times.
✓ Branch 216 taken 7070 times.
✓ Branch 217 taken 3633 times.
✗ Branch 218 not taken.
✓ Branch 219 taken 7070 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
134659909 if (ch >= 0x17000 && ch <= 0x18AFF) // Tangut character
1228 {
1229 877891 page = 0xFB00;
1230 877891 implicit[3] = (ch - 0x17000) | 0x8000;
1231 } else {
1232 133782018 page = ch >> 15;
1233 133782018 implicit[3] = (ch & 0x7FFF) | 0x8000;
1234
106/560
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 3105188 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 3085824 times.
✓ Branch 83 taken 19364 times.
✓ Branch 84 taken 2945280 times.
✓ Branch 85 taken 140544 times.
✓ Branch 86 taken 2817147 times.
✓ Branch 87 taken 128133 times.
✓ Branch 88 taken 2817024 times.
✓ Branch 89 taken 140667 times.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✓ Branch 100 taken 94752986 times.
✗ Branch 101 not taken.
✓ Branch 102 taken 94196082 times.
✓ Branch 103 taken 556904 times.
✓ Branch 104 taken 88216434 times.
✓ Branch 105 taken 5979648 times.
✓ Branch 106 taken 84500577 times.
✓ Branch 107 taken 3715857 times.
✓ Branch 108 taken 84497010 times.
✓ Branch 109 taken 5983215 times.
✓ Branch 110 taken 3168201 times.
✗ Branch 111 not taken.
✓ Branch 112 taken 3149001 times.
✓ Branch 113 taken 19200 times.
✓ Branch 114 taken 2945337 times.
✓ Branch 115 taken 203664 times.
✓ Branch 116 taken 2817204 times.
✓ Branch 117 taken 128133 times.
✓ Branch 118 taken 2817081 times.
✓ Branch 119 taken 203787 times.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✓ Branch 130 taken 2112194 times.
✗ Branch 131 not taken.
✓ Branch 132 taken 2099394 times.
✓ Branch 133 taken 12800 times.
✓ Branch 134 taken 1963558 times.
✓ Branch 135 taken 135836 times.
✓ Branch 136 taken 1878136 times.
✓ Branch 137 taken 85422 times.
✓ Branch 138 taken 1878054 times.
✓ Branch 139 taken 135918 times.
✓ Branch 140 taken 29568000 times.
✗ Branch 141 not taken.
✓ Branch 142 taken 29388800 times.
✓ Branch 143 taken 179200 times.
✓ Branch 144 taken 27489280 times.
✓ Branch 145 taken 1899520 times.
✓ Branch 146 taken 26293372 times.
✓ Branch 147 taken 1195908 times.
✓ Branch 148 taken 26292224 times.
✓ Branch 149 taken 1900668 times.
✓ Branch 150 taken 1056246 times.
✗ Branch 151 not taken.
✓ Branch 152 taken 1049844 times.
✓ Branch 153 taken 6402 times.
✓ Branch 154 taken 981783 times.
✓ Branch 155 taken 68061 times.
✓ Branch 156 taken 939069 times.
✓ Branch 157 taken 42714 times.
✓ Branch 158 taken 939028 times.
✓ Branch 159 taken 68102 times.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✓ Branch 310 taken 32 times.
✗ Branch 311 not taken.
✓ Branch 312 taken 32 times.
✗ Branch 313 not taken.
✓ Branch 314 taken 2 times.
✓ Branch 315 taken 30 times.
✗ Branch 316 not taken.
✓ Branch 317 taken 2 times.
✗ Branch 318 not taken.
✓ Branch 319 taken 30 times.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
✗ Branch 448 not taken.
✗ Branch 449 not taken.
✗ Branch 450 not taken.
✗ Branch 451 not taken.
✗ Branch 452 not taken.
✗ Branch 453 not taken.
✗ Branch 454 not taken.
✗ Branch 455 not taken.
✗ Branch 456 not taken.
✗ Branch 457 not taken.
✗ Branch 458 not taken.
✗ Branch 459 not taken.
✗ Branch 460 not taken.
✗ Branch 461 not taken.
✗ Branch 462 not taken.
✗ Branch 463 not taken.
✗ Branch 464 not taken.
✗ Branch 465 not taken.
✗ Branch 466 not taken.
✗ Branch 467 not taken.
✗ Branch 468 not taken.
✗ Branch 469 not taken.
✓ Branch 470 taken 139 times.
✗ Branch 471 not taken.
✓ Branch 472 taken 139 times.
✗ Branch 473 not taken.
✓ Branch 474 taken 100 times.
✓ Branch 475 taken 39 times.
✓ Branch 476 taken 100 times.
✗ Branch 477 not taken.
✓ Branch 478 taken 100 times.
✓ Branch 479 taken 39 times.
✓ Branch 480 taken 119 times.
✗ Branch 481 not taken.
✓ Branch 482 taken 119 times.
✗ Branch 483 not taken.
✓ Branch 484 taken 94 times.
✓ Branch 485 taken 25 times.
✓ Branch 486 taken 94 times.
✗ Branch 487 not taken.
✓ Branch 488 taken 94 times.
✓ Branch 489 taken 25 times.
✗ Branch 490 not taken.
✗ Branch 491 not taken.
✗ Branch 492 not taken.
✗ Branch 493 not taken.
✗ Branch 494 not taken.
✗ Branch 495 not taken.
✗ Branch 496 not taken.
✗ Branch 497 not taken.
✗ Branch 498 not taken.
✗ Branch 499 not taken.
✓ Branch 500 taken 138 times.
✗ Branch 501 not taken.
✓ Branch 502 taken 138 times.
✗ Branch 503 not taken.
✓ Branch 504 taken 99 times.
✓ Branch 505 taken 39 times.
✓ Branch 506 taken 99 times.
✗ Branch 507 not taken.
✓ Branch 508 taken 99 times.
✓ Branch 509 taken 39 times.
✓ Branch 510 taken 120 times.
✗ Branch 511 not taken.
✓ Branch 512 taken 120 times.
✗ Branch 513 not taken.
✓ Branch 514 taken 96 times.
✓ Branch 515 taken 24 times.
✓ Branch 516 taken 96 times.
✗ Branch 517 not taken.
✓ Branch 518 taken 96 times.
✓ Branch 519 taken 24 times.
✗ Branch 520 not taken.
✗ Branch 521 not taken.
✗ Branch 522 not taken.
✗ Branch 523 not taken.
✗ Branch 524 not taken.
✗ Branch 525 not taken.
✗ Branch 526 not taken.
✗ Branch 527 not taken.
✗ Branch 528 not taken.
✗ Branch 529 not taken.
✓ Branch 530 taken 7952 times.
✗ Branch 531 not taken.
✓ Branch 532 taken 7707 times.
✓ Branch 533 taken 245 times.
✓ Branch 534 taken 4212 times.
✓ Branch 535 taken 3495 times.
✓ Branch 536 taken 3559 times.
✓ Branch 537 taken 653 times.
✓ Branch 538 taken 3559 times.
✓ Branch 539 taken 3495 times.
✓ Branch 540 taken 10703 times.
✗ Branch 541 not taken.
✓ Branch 542 taken 10510 times.
✓ Branch 543 taken 193 times.
✓ Branch 544 taken 7046 times.
✓ Branch 545 taken 3464 times.
✓ Branch 546 taken 6420 times.
✓ Branch 547 taken 626 times.
✓ Branch 548 taken 6420 times.
✓ Branch 549 taken 3464 times.
✗ Branch 550 not taken.
✗ Branch 551 not taken.
✗ Branch 552 not taken.
✗ Branch 553 not taken.
✗ Branch 554 not taken.
✗ Branch 555 not taken.
✗ Branch 556 not taken.
✗ Branch 557 not taken.
✗ Branch 558 not taken.
✗ Branch 559 not taken.
133782018 if ((ch >= 0x3400 && ch <= 0x4DB5) || (ch >= 0x20000 && ch <= 0x2A6D6) ||
1235
92/448
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✓ Branch 64 taken 2804577 times.
✓ Branch 65 taken 12447 times.
✓ Branch 66 taken 2804544 times.
✓ Branch 67 taken 140700 times.
✓ Branch 68 taken 2803878 times.
✓ Branch 69 taken 666 times.
✓ Branch 70 taken 2803872 times.
✓ Branch 71 taken 140706 times.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 84132816 times.
✓ Branch 81 taken 364194 times.
✓ Branch 82 taken 84131826 times.
✓ Branch 83 taken 5984205 times.
✓ Branch 84 taken 84111936 times.
✓ Branch 85 taken 19890 times.
✓ Branch 86 taken 84111762 times.
✓ Branch 87 taken 5984379 times.
✓ Branch 88 taken 2804634 times.
✓ Branch 89 taken 12447 times.
✓ Branch 90 taken 2804601 times.
✓ Branch 91 taken 203820 times.
✓ Branch 92 taken 2803911 times.
✓ Branch 93 taken 690 times.
✓ Branch 94 taken 2803905 times.
✓ Branch 95 taken 203826 times.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 1869756 times.
✓ Branch 105 taken 8298 times.
✓ Branch 106 taken 1869734 times.
✓ Branch 107 taken 135940 times.
✓ Branch 108 taken 1869274 times.
✓ Branch 109 taken 460 times.
✓ Branch 110 taken 1869270 times.
✓ Branch 111 taken 135944 times.
✓ Branch 112 taken 26176052 times.
✓ Branch 113 taken 116172 times.
✓ Branch 114 taken 26175744 times.
✓ Branch 115 taken 1900976 times.
✓ Branch 116 taken 26169528 times.
✓ Branch 117 taken 6216 times.
✓ Branch 118 taken 26169472 times.
✓ Branch 119 taken 1901032 times.
✓ Branch 120 taken 934879 times.
✓ Branch 121 taken 4149 times.
✓ Branch 122 taken 934868 times.
✓ Branch 123 taken 68113 times.
✓ Branch 124 taken 934638 times.
✓ Branch 125 taken 230 times.
✓ Branch 126 taken 934636 times.
✓ Branch 127 taken 68115 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✓ Branch 251 taken 30 times.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✓ Branch 255 taken 30 times.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✓ Branch 376 taken 100 times.
✗ Branch 377 not taken.
✓ Branch 378 taken 100 times.
✓ Branch 379 taken 39 times.
✓ Branch 380 taken 56 times.
✓ Branch 381 taken 44 times.
✓ Branch 382 taken 56 times.
✓ Branch 383 taken 39 times.
✓ Branch 384 taken 94 times.
✗ Branch 385 not taken.
✓ Branch 386 taken 94 times.
✓ Branch 387 taken 25 times.
✓ Branch 388 taken 61 times.
✓ Branch 389 taken 33 times.
✓ Branch 390 taken 61 times.
✓ Branch 391 taken 25 times.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✓ Branch 400 taken 99 times.
✗ Branch 401 not taken.
✓ Branch 402 taken 99 times.
✓ Branch 403 taken 39 times.
✓ Branch 404 taken 55 times.
✓ Branch 405 taken 44 times.
✓ Branch 406 taken 55 times.
✓ Branch 407 taken 39 times.
✓ Branch 408 taken 96 times.
✗ Branch 409 not taken.
✓ Branch 410 taken 96 times.
✓ Branch 411 taken 24 times.
✓ Branch 412 taken 63 times.
✓ Branch 413 taken 33 times.
✓ Branch 414 taken 63 times.
✓ Branch 415 taken 24 times.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✓ Branch 424 taken 3559 times.
✗ Branch 425 not taken.
✓ Branch 426 taken 3559 times.
✓ Branch 427 taken 3495 times.
✓ Branch 428 taken 3515 times.
✓ Branch 429 taken 44 times.
✓ Branch 430 taken 3515 times.
✓ Branch 431 taken 3495 times.
✓ Branch 432 taken 6420 times.
✗ Branch 433 not taken.
✓ Branch 434 taken 6420 times.
✓ Branch 435 taken 3464 times.
✓ Branch 436 taken 6387 times.
✓ Branch 437 taken 33 times.
✓ Branch 438 taken 6387 times.
✓ Branch 439 taken 3464 times.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
127690262 (ch >= 0x2A700 && ch <= 0x2B734) || (ch >= 0x2B740 && ch <= 0x2B81D) ||
1236
24/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 17286 times.
✓ Branch 17 taken 2786586 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 514350 times.
✓ Branch 21 taken 83597412 times.
✓ Branch 22 taken 17310 times.
✓ Branch 23 taken 2786595 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 11540 times.
✓ Branch 27 taken 1857730 times.
✓ Branch 28 taken 161336 times.
✓ Branch 29 taken 26008136 times.
✓ Branch 30 taken 5770 times.
✓ Branch 31 taken 928866 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 31 times.
✓ Branch 95 taken 25 times.
✓ Branch 96 taken 44 times.
✓ Branch 97 taken 17 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✓ Branch 100 taken 31 times.
✓ Branch 101 taken 24 times.
✓ Branch 102 taken 44 times.
✓ Branch 103 taken 19 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 31 times.
✓ Branch 107 taken 3484 times.
✓ Branch 108 taken 44 times.
✓ Branch 109 taken 6343 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
118703054 (ch >= 0x2B820 && ch <= 0x2CEA1)) {
1237 7365663 page += 0xFB80;
1238
63/448
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✓ Branch 64 taken 2927292 times.
✗ Branch 65 not taken.
✓ Branch 66 taken 2927292 times.
✗ Branch 67 not taken.
✓ Branch 68 taken 2908092 times.
✓ Branch 69 taken 19200 times.
✗ Branch 70 not taken.
✓ Branch 71 taken 2908092 times.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 89581791 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 87821991 times.
✓ Branch 83 taken 1759800 times.
✓ Branch 84 taken 87242463 times.
✓ Branch 85 taken 579528 times.
✗ Branch 86 not taken.
✓ Branch 87 taken 87242463 times.
✓ Branch 88 taken 2990421 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 2927451 times.
✓ Branch 91 taken 62970 times.
✓ Branch 92 taken 2908125 times.
✓ Branch 93 taken 19326 times.
✗ Branch 94 not taken.
✓ Branch 95 taken 2908125 times.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 1993674 times.
✗ Branch 105 not taken.
✓ Branch 106 taken 1951634 times.
✓ Branch 107 taken 42040 times.
✓ Branch 108 taken 1938750 times.
✓ Branch 109 taken 12884 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 1938750 times.
✓ Branch 112 taken 27909168 times.
✗ Branch 113 not taken.
✓ Branch 114 taken 27322568 times.
✓ Branch 115 taken 586600 times.
✓ Branch 116 taken 27142192 times.
✓ Branch 117 taken 180376 times.
✗ Branch 118 not taken.
✓ Branch 119 taken 27142192 times.
✓ Branch 120 taken 996981 times.
✗ Branch 121 not taken.
✓ Branch 122 taken 975818 times.
✓ Branch 123 taken 21163 times.
✓ Branch 124 taken 969376 times.
✓ Branch 125 taken 6442 times.
✗ Branch 126 not taken.
✓ Branch 127 taken 969376 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✓ Branch 248 taken 30 times.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✓ Branch 251 taken 30 times.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✓ Branch 376 taken 64 times.
✗ Branch 377 not taken.
✓ Branch 378 taken 64 times.
✗ Branch 379 not taken.
✓ Branch 380 taken 64 times.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✓ Branch 383 taken 64 times.
✓ Branch 384 taken 42 times.
✗ Branch 385 not taken.
✓ Branch 386 taken 42 times.
✗ Branch 387 not taken.
✓ Branch 388 taken 42 times.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✓ Branch 391 taken 42 times.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✓ Branch 400 taken 63 times.
✗ Branch 401 not taken.
✓ Branch 402 taken 63 times.
✗ Branch 403 not taken.
✓ Branch 404 taken 63 times.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✓ Branch 407 taken 63 times.
✓ Branch 408 taken 43 times.
✗ Branch 409 not taken.
✓ Branch 410 taken 43 times.
✗ Branch 411 not taken.
✓ Branch 412 taken 43 times.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✓ Branch 415 taken 43 times.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✓ Branch 424 taken 6979 times.
✗ Branch 425 not taken.
✓ Branch 426 taken 3523 times.
✓ Branch 427 taken 3456 times.
✓ Branch 428 taken 3523 times.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✓ Branch 431 taken 3523 times.
✓ Branch 432 taken 9807 times.
✗ Branch 433 not taken.
✓ Branch 434 taken 6367 times.
✓ Branch 435 taken 3440 times.
✓ Branch 436 taken 6367 times.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✓ Branch 439 taken 6367 times.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
126416355 } else if ((ch >= 0x4E00 && ch <= 0x9FD5) || (ch >= 0xFA0E && ch <= 0xFA29))
1239 2479499 page += 0xFB40;
1240 else
1241 123936856 page += 0xFBC0;
1242 }
1243
14/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 3125924 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 2964661 times.
✓ Branch 21 taken 92410472 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 3188937 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 2126018 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 29761536 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 1063158 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 32 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 139 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 119 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✓ Branch 101 taken 138 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 120 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✓ Branch 107 taken 7952 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 10703 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
134659909 if (cs->coll_param == &zh_coll_param) {
1244 2964661 page = change_zh_implicit(page);
1245 }
1246 134659909 implicit[0] = page;
1247 134659909 implicit[1] = 0x0020;
1248 134659909 implicit[2] = 0x0002;
1249 // implicit[3] is set above.
1250 134659909 implicit[4] = 0;
1251 134659909 implicit[5] = 0;
1252 134659909 num_of_ce_left = 1;
1253 134659909 wbeg = implicit + MY_UCA_900_CE_SIZE + weight_lv;
1254 134659909 wbeg_stride = MY_UCA_900_CE_SIZE;
1255
1256 134659909 return *(implicit + weight_lv);
1257 }
1258
1259 /**
1260 Return implicit UCA weight
1261 Used for code points that do not have assigned UCA weights.
1262
1263 @return The leading implicit weight.
1264 */
1265
1266 template <class Mb_wc>
1267 ALWAYS_INLINE int uca_scanner_any<Mb_wc>::next_implicit(my_wc_t ch) {
1268 10321974 implicit[0] = (ch & 0x7FFF) | 0x8000;
1269 10321974 implicit[1] = 0;
1270 10321974 wbeg = implicit;
1271 10321974 wbeg_stride = MY_UCA_900_CE_SIZE;
1272
1273 10321974 uint page = ch >> 15;
1274
1275
10/36
✓ Branch 0 taken 620800 times.
✓ Branch 1 taken 7183423 times.
✓ Branch 2 taken 153600 times.
✓ Branch 3 taken 2007555 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 372 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 357 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 12800 times.
✓ Branch 17 taken 76815 times.
✓ Branch 18 taken 12800 times.
✓ Branch 19 taken 76812 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
10145334 if (ch >= 0x3400 && ch <= 0x4DB5)
1276 800000 page += 0xFB80;
1277
18/72
✓ Branch 0 taken 7183423 times.
✓ Branch 1 taken 141312 times.
✓ Branch 2 taken 2082844 times.
✓ Branch 3 taken 5100579 times.
✓ Branch 4 taken 2007555 times.
✓ Branch 5 taken 35328 times.
✓ Branch 6 taken 501648 times.
✓ Branch 7 taken 1505907 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 372 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✓ Branch 19 taken 372 times.
✓ Branch 20 taken 357 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 357 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 76815 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 41804 times.
✓ Branch 35 taken 35011 times.
✓ Branch 36 taken 76812 times.
✗ Branch 37 not taken.
✓ Branch 38 taken 41804 times.
✓ Branch 39 taken 35008 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
9521974 else if (ch >= 0x4E00 && ch <= 0x9FA5)
1278 2668100 page += 0xFB40;
1279 else
1280 6853874 page += 0xFBC0;
1281
1282 10321974 return page;
1283 }
1284
1285 template <class Mb_wc>
1286 ALWAYS_INLINE int uca_scanner_any<Mb_wc>::next() {
1287 /*
1288 Check if the weights for the previous code point have been
1289 already fully scanned. If yes, then get the next code point and
1290 initialize wbeg and wlength to its weight string.
1291 */
1292
1293
16/28
✓ Branch 0 taken 8537332 times.
✓ Branch 1 taken 116150513 times.
✓ Branch 2 taken 2334632 times.
✓ Branch 3 taken 53589110 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 75531 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 15926 times.
✓ Branch 8 taken 4312 times.
✓ Branch 9 taken 7238748 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 368 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1428 times.
✓ Branch 14 taken 101974 times.
✓ Branch 15 taken 290282 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 8 times.
✗ Branch 18 not taken.
✓ Branch 19 taken 20 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 98 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 84 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
188340366 if (wbeg[0]) /* More weights left from the previous step: */
1294 11083358 return *wbeg++; /* return the next weight from expansion */
1295
1296 do {
1297 184985964 my_wc_t wc = 0;
1298
1299 /* Get next code point */
1300
23/36
✓ Branch 0 taken 116225554 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 26750554 times.
✓ Branch 3 taken 26850401 times.
✓ Branch 4 taken 75531 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 600 times.
✓ Branch 7 taken 15326 times.
✓ Branch 8 taken 7240409 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 7236364 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 370 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1428 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 141296 times.
✓ Branch 17 taken 150316 times.
✓ Branch 18 taken 136934 times.
✓ Branch 19 taken 155131 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 4 times.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 100 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 98 times.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✓ Branch 31 taken 54 times.
✓ Branch 32 taken 30 times.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
184985964 int mblen = mb_wc(&wc, sbeg, send);
1301
31/36
✓ Branch 0 taken 58156873 times.
✓ Branch 1 taken 58068681 times.
✓ Branch 2 taken 26898010 times.
✓ Branch 3 taken 26702945 times.
✓ Branch 4 taken 2948 times.
✓ Branch 5 taken 72583 times.
✓ Branch 6 taken 600 times.
✓ Branch 7 taken 15326 times.
✓ Branch 8 taken 112808 times.
✓ Branch 9 taken 7127601 times.
✓ Branch 10 taken 112423 times.
✓ Branch 11 taken 7123941 times.
✓ Branch 12 taken 232 times.
✓ Branch 13 taken 138 times.
✓ Branch 14 taken 418 times.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 141296 times.
✓ Branch 17 taken 150316 times.
✓ Branch 18 taken 136934 times.
✓ Branch 19 taken 155131 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 4 times.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 19 times.
✓ Branch 25 taken 81 times.
✓ Branch 26 taken 19 times.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✓ Branch 31 taken 54 times.
✓ Branch 32 taken 30 times.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
184980609 if (mblen <= 0) {
1302 85562665 ++weight_lv;
1303
19/36
✓ Branch 0 taken 58009417 times.
✓ Branch 1 taken 147456 times.
✓ Branch 2 taken 26750554 times.
✓ Branch 3 taken 147456 times.
✓ Branch 4 taken 2948 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 600 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 112809 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 111397 times.
✓ Branch 11 taken 1026 times.
✓ Branch 12 taken 232 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 418 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 141296 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 136934 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 5 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 19 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 19 times.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✗ Branch 31 not taken.
✓ Branch 32 taken 30 times.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
168534893 if (sbeg >= send) return -1; /* No more bytes, end of line reached */
1304 /*
1305 There are some more bytes left. Non-positive mb_len means that
1306 we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
1307 */
1308
3/36
✗ Branch 0 not taken.
✓ Branch 1 taken 147456 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 147456 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1026 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
295937 if ((sbeg += cs->mbminlen) > send) {
1309 /* For safety purposes don't go beyond the string range. */
1310 sbeg = send;
1311 }
1312 /*
1313 Treat every complete or incomplete mbminlen unit as a weight which is
1314 greater than weight for any possible normal character.
1315 0xFFFF is greater than any possible weight in the UCA weight table.
1316 */
1317 295937 return 0xFFFF;
1318 }
1319
1320 99417944 sbeg += mblen;
1321 99417944 char_index++;
1322
18/36
✓ Branch 0 taken 48234500 times.
✓ Branch 1 taken 9834181 times.
✓ Branch 2 taken 24117255 times.
✓ Branch 3 taken 2585690 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 72583 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 15326 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 7127601 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 7123941 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 138 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 150316 times.
✓ Branch 18 taken 1 times.
✓ Branch 19 taken 155130 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 4 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 81 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
99417944 if (wc > uca->maxchar) {
1323 /* Return 0xFFFD as weight for all characters outside BMP */
1324 72351756 wbeg = nochar;
1325 72351756 wbeg_stride = 0;
1326 72351756 return 0xFFFD;
1327 }
1328
1329
25/36
✓ Branch 0 taken 1639672 times.
✓ Branch 1 taken 8194509 times.
✓ Branch 2 taken 384278 times.
✓ Branch 3 taken 2201412 times.
✓ Branch 4 taken 24576 times.
✓ Branch 5 taken 48006 times.
✓ Branch 6 taken 6185 times.
✓ Branch 7 taken 9141 times.
✓ Branch 8 taken 13944 times.
✓ Branch 9 taken 7116660 times.
✓ Branch 10 taken 15634 times.
✓ Branch 11 taken 7114296 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 138 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 4263 times.
✓ Branch 17 taken 146053 times.
✓ Branch 18 taken 4468 times.
✓ Branch 19 taken 150662 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 4 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 2 times.
✓ Branch 25 taken 79 times.
✓ Branch 26 taken 2 times.
✓ Branch 27 taken 77 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
27066188 if (my_uca_have_contractions(uca)) {
1330 const uint16 *cweight;
1331 /*
1332 If we have scanned a code point which can have previous context,
1333 and there were some more code point already before,
1334 then verify that {prev_char, wc} together form
1335 a real previous context pair.
1336 Note, we support only 2-character long sequences with previous
1337 context at the moment. CLDR does not have longer sequences.
1338 */
1339 2093024 if (my_uca_can_be_previous_context_tail(uca->contraction_flags, wc) &&
1340
4/72
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 40 times.
✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
120 wbeg != nochar && /* if not the very first character */
1341 56 my_uca_can_be_previous_context_head(uca->contraction_flags,
1342
22/72
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 1639608 times.
✓ Branch 2 taken 40 times.
✓ Branch 3 taken 1639632 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 384278 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 384278 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 24576 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 24576 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 6185 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 6185 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 14048 times.
✗ Branch 18 not taken.
✓ Branch 19 taken 14048 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 14854 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 14854 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✓ Branch 33 taken 4263 times.
✗ Branch 34 not taken.
✓ Branch 35 taken 4263 times.
✗ Branch 36 not taken.
✓ Branch 37 taken 4468 times.
✗ Branch 38 not taken.
✓ Branch 39 taken 4468 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2 times.
✗ Branch 50 not taken.
✓ Branch 51 taken 2 times.
✗ Branch 52 not taken.
✓ Branch 53 taken 2 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 2 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
2092412 prev_char) &&
1343
2/72
✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 40 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
80 (cweight = previous_context_find(prev_char, wc))) {
1344 40 prev_char = 0; /* Clear for the next character */
1345 40 return *cweight;
1346
18/36
✓ Branch 0 taken 2522 times.
✓ Branch 1 taken 1637110 times.
✓ Branch 2 taken 378 times.
✓ Branch 3 taken 383900 times.
✓ Branch 4 taken 592 times.
✓ Branch 5 taken 23984 times.
✓ Branch 6 taken 148 times.
✓ Branch 7 taken 6037 times.
✓ Branch 8 taken 1428 times.
✓ Branch 9 taken 12620 times.
✓ Branch 10 taken 1160 times.
✓ Branch 11 taken 13694 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 540 times.
✓ Branch 17 taken 3723 times.
✓ Branch 18 taken 421 times.
✓ Branch 19 taken 4047 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 2 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 2 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
2092308 } else if (my_uca_can_be_contraction_head(uca->contraction_flags, wc)) {
1347 /* Check if wc starts a contraction */
1348 size_t chars_skipped;
1349
24/72
✓ Branch 0 taken 2522 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1007 times.
✓ Branch 3 taken 1515 times.
✓ Branch 4 taken 378 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 36 times.
✓ Branch 7 taken 342 times.
✓ Branch 8 taken 592 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 164 times.
✓ Branch 11 taken 428 times.
✓ Branch 12 taken 148 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 41 times.
✓ Branch 15 taken 107 times.
✓ Branch 16 taken 1428 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 510 times.
✓ Branch 19 taken 918 times.
✓ Branch 20 taken 1160 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 583 times.
✓ Branch 23 taken 577 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 540 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 59 times.
✓ Branch 35 taken 481 times.
✓ Branch 36 taken 421 times.
✗ Branch 37 not taken.
✓ Branch 38 taken 121 times.
✓ Branch 39 taken 300 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
7189 if ((cweight = contraction_find(wc, &chars_skipped))) {
1350 2521 char_index += chars_skipped;
1351 2521 return *cweight;
1352 }
1353 }
1354 2089787 prev_char = wc;
1355 }
1356
1357 /* Process single code point */
1358 27071942 uint page = wc >> 8;
1359 27071942 uint code = wc & 0xFF;
1360
1361 /* If weight page for wc does not exist, then calculate algorithmically */
1362 27071942 const uint16 *wpage = uca->weights[page];
1363
29/72
✓ Branch 0 taken 7945535 times.
✓ Branch 1 taken 1887599 times.
✓ Branch 2 taken 7804223 times.
✓ Branch 3 taken 141312 times.
✓ Branch 4 taken 2196483 times.
✓ Branch 5 taken 389171 times.
✓ Branch 6 taken 2161155 times.
✓ Branch 7 taken 35328 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 72418 times.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✓ Branch 13 taken 15285 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 372 times.
✓ Branch 17 taken 7129826 times.
✓ Branch 18 taken 372 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 357 times.
✓ Branch 21 taken 7128210 times.
✓ Branch 22 taken 357 times.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 138 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✓ Branch 29 taken 1010 times.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 89615 times.
✓ Branch 33 taken 60642 times.
✓ Branch 34 taken 89615 times.
✗ Branch 35 not taken.
✓ Branch 36 taken 89612 times.
✓ Branch 37 taken 65397 times.
✓ Branch 38 taken 89612 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✓ Branch 41 taken 4 times.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 81 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 79 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✓ Branch 61 taken 54 times.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✓ Branch 65 taken 54 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
37393916 if (!wpage) return next_implicit(wc);
1364
1365 /* Calculate pointer to wc's weight, using page and offset */
1366 16749968 wbeg = wpage + code * uca->lengths[page];
1367 16749968 wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS;
1368
23/36
✓ Branch 0 taken 75041 times.
✓ Branch 1 taken 1812558 times.
✓ Branch 2 taken 11845 times.
✓ Branch 3 taken 377326 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 72419 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 15285 times.
✓ Branch 8 taken 12309 times.
✓ Branch 9 taken 7117517 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 7129105 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 136 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 1330 times.
✓ Branch 17 taken 59312 times.
✓ Branch 18 taken 1365 times.
✓ Branch 19 taken 64032 times.
✓ Branch 20 taken 1 times.
✓ Branch 21 taken 3 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 2 times.
✓ Branch 25 taken 79 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
16749968 } while (!wbeg[0]); /* Skip ignorable code points */
1369
1370 16648969 return *wbeg++;
1371 }
1372
1373 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1374 65378597316 inline int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::more_weight() {
1375 /*
1376 Check if the weights for the previous code point have been
1377 already fully scanned. If no, return the first non-zero
1378 weight.
1379 */
1380
1381
4/4
✓ Branch 0 taken 139614938 times.
✓ Branch 1 taken 32619586691 times.
✓ Branch 2 taken 69902971 times.
✓ Branch 3 taken 69711967 times.
65518403258 while (num_of_ce_left != 0 && *wbeg == 0) {
1382 139805942 wbeg += wbeg_stride;
1383 139805942 --num_of_ce_left;
1384 }
1385
2/2
✓ Branch 0 taken 69711967 times.
✓ Branch 1 taken 32619586691 times.
65378597316 if (num_of_ce_left != 0) {
1386 139423934 uint16 rtn = *wbeg;
1387 139423934 wbeg += wbeg_stride;
1388 139423934 --num_of_ce_left;
1389 139423934 return rtn; /* return the next weight from expansion */
1390 }
1391 65239173382 return -1;
1392 }
1393
1394 1114045 static inline bool is_hiragana_char(my_wc_t wc) {
1395
4/4
✓ Branch 0 taken 1100883 times.
✓ Branch 1 taken 13162 times.
✓ Branch 2 taken 967 times.
✓ Branch 3 taken 1099916 times.
1114045 return wc >= 0x3041 && wc <= 0x3096;
1396 }
1397
1398 1114909 static inline bool is_katakana_char(my_wc_t wc) {
1399
6/6
✓ Branch 0 taken 1100766 times.
✓ Branch 1 taken 14143 times.
✓ Branch 2 taken 1100033 times.
✓ Branch 3 taken 733 times.
✓ Branch 4 taken 1048786 times.
✓ Branch 5 taken 65390 times.
2163695 return (wc >= 0x30A1 && wc <= 0x30FA) || // Full width katakana
1400
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 1048674 times.
2163695 (wc >= 0xFF66 && wc <= 0xFF9D); // Half width katakana
1401 }
1402
1403 1114040 static inline bool is_katakana_iteration(my_wc_t wc) {
1404
4/4
✓ Branch 0 taken 1114036 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 1114034 times.
1114040 return wc == 0x30FD || wc == 0x30FE;
1405 }
1406
1407 1113078 static inline bool is_hiragana_iteration(my_wc_t wc) {
1408
4/4
✓ Branch 0 taken 1113074 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 1113072 times.
1113078 return wc == 0x309D || wc == 0x309E;
1409 }
1410
1411 1113769 static inline bool is_ja_length_mark(my_wc_t wc) { return wc == 0x30FC; }
1412
1413 /**
1414 Return quaternary weight when running for that level.
1415
1416 @retval 0 - Do not return quaternary weight.
1417 @retval others - Quaternary weight for this character.
1418 */
1419 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1420 ALWAYS_INLINE int
1421 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::handle_ja_contraction_quat_wt() {
1422 /*
1423 For Japanese, only weight shift rule and previous context rule is
1424 defined. And in previous context rules, the first character is always
1425 katakana / hiragana, and the second character is always iteration or
1426 length mark. The quaternary weight of iteration / length mark is
1427 same as the first character. So has_quaternary_weight is always true.
1428 For how we return quaternary weight, please refer to the comment in
1429 handle_ja_common_quat_wt().
1430 */
1431
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 80 times.
✓ Branch 5 taken 479 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 14 times.
✓ Branch 13 taken 42 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 12 times.
✓ Branch 23 taken 45 times.
✓ Branch 24 taken 12 times.
✓ Branch 25 taken 45 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
729 if (weight_lv == 3) {
1432 118 wbeg = nochar;
1433 118 num_of_ce_left = 0;
1434
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 74 times.
✓ Branch 5 taken 6 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 6 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 6 times.
✓ Branch 23 taken 6 times.
✓ Branch 24 taken 6 times.
✓ Branch 25 taken 6 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
118 if (is_katakana_char(prev_char)) {
1435 94 return JA_KATA_QUAT_WEIGHT;
1436
4/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 6 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 6 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 6 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 6 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
24 } else if (is_hiragana_char(prev_char)) {
1437 24 return JA_HIRA_QUAT_WEIGHT;
1438 }
1439 }
1440 611 return 0;
1441 }
1442
1443 /**
1444 Check whether quaternary weight is needed for character with Japanese
1445 kana-sensitive collation. If it is, return quaternary weight when running
1446 for that level.
1447
1448 @retval 0 - Quaternary weight check is done.
1449 @retval -1 - There is no quaternary weight for this character.
1450 @retval others - Quaternary weight for this character.
1451 */
1452 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1453 ALWAYS_INLINE int
1454 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::handle_ja_common_quat_wt(
1455 my_wc_t wc) {
1456 /*
1457 For Japanese kana-sensitive collation, we detect whether quaternary
1458 weight is necessary when scanning for the first level of weight.
1459 If it is, the quaternary weight will be returned for katakana /
1460 hiragana later.
1461 */
1462
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1112629 times.
✓ Branch 5 taken 2458 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 282 times.
✓ Branch 13 taken 39 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 80 times.
✓ Branch 23 taken 54 times.
✓ Branch 24 taken 79 times.
✓ Branch 25 taken 54 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
1115675 if (weight_lv == 0 && !has_quaternary_weight) {
1463
14/56
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 1112458 times.
✓ Branch 9 taken 2 times.
✓ Branch 10 taken 1112261 times.
✓ Branch 11 taken 197 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 266 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 254 times.
✓ Branch 27 taken 12 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✓ Branch 44 taken 59 times.
✗ Branch 45 not taken.
✓ Branch 46 taken 31 times.
✓ Branch 47 taken 28 times.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 1 times.
✓ Branch 50 taken 31 times.
✓ Branch 51 taken 25 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
3338751 if (is_katakana_char(wc) || is_katakana_iteration(wc) ||
1464
30/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1112460 times.
✓ Branch 17 taken 169 times.
✓ Branch 18 taken 1112259 times.
✓ Branch 19 taken 2 times.
✓ Branch 20 taken 3 times.
✓ Branch 21 taken 1112256 times.
✓ Branch 22 taken 373 times.
✓ Branch 23 taken 1112256 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 266 times.
✓ Branch 49 taken 16 times.
✓ Branch 50 taken 254 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 4 times.
✓ Branch 53 taken 250 times.
✓ Branch 54 taken 32 times.
✓ Branch 55 taken 250 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 59 times.
✓ Branch 89 taken 21 times.
✓ Branch 90 taken 30 times.
✓ Branch 91 taken 1 times.
✓ Branch 92 taken 3 times.
✓ Branch 93 taken 27 times.
✓ Branch 94 taken 53 times.
✓ Branch 95 taken 27 times.
✓ Branch 96 taken 57 times.
✓ Branch 97 taken 22 times.
✓ Branch 98 taken 31 times.
✗ Branch 99 not taken.
✓ Branch 100 taken 8 times.
✓ Branch 101 taken 23 times.
✓ Branch 102 taken 56 times.
✓ Branch 103 taken 23 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
4451325 is_hiragana_char(wc) || is_hiragana_iteration(wc) ||
1465 1112574 is_ja_length_mark(wc))
1466 514 has_quaternary_weight = true;
1467
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1458 times.
✓ Branch 5 taken 2230605 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 83 times.
✓ Branch 13 taken 681 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 90 times.
✓ Branch 23 taken 279 times.
✓ Branch 24 taken 90 times.
✓ Branch 25 taken 279 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
2233565 } else if (weight_lv == 3) {
1468 1721 wbeg = nochar;
1469 1721 num_of_ce_left = 0;
1470
30/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1043 times.
✓ Branch 17 taken 415 times.
✓ Branch 18 taken 1041 times.
✓ Branch 19 taken 2 times.
✓ Branch 20 taken 3 times.
✓ Branch 21 taken 1038 times.
✓ Branch 22 taken 420 times.
✓ Branch 23 taken 1038 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 43 times.
✓ Branch 49 taken 40 times.
✓ Branch 50 taken 43 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 4 times.
✓ Branch 53 taken 39 times.
✓ Branch 54 taken 44 times.
✓ Branch 55 taken 39 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✓ Branch 89 taken 33 times.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✓ Branch 92 taken 3 times.
✓ Branch 93 taken 54 times.
✓ Branch 94 taken 36 times.
✓ Branch 95 taken 54 times.
✓ Branch 96 taken 55 times.
✓ Branch 97 taken 35 times.
✓ Branch 98 taken 54 times.
✓ Branch 99 taken 1 times.
✓ Branch 100 taken 3 times.
✓ Branch 101 taken 51 times.
✓ Branch 102 taken 39 times.
✓ Branch 103 taken 51 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
2916 if (is_katakana_char(wc) || is_katakana_iteration(wc) ||
1471 1195 is_ja_length_mark(wc)) {
1472 539 return JA_KATA_QUAT_WEIGHT;
1473
22/84
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 470 times.
✓ Branch 13 taken 568 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 468 times.
✓ Branch 16 taken 570 times.
✓ Branch 17 taken 468 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✓ Branch 36 taken 12 times.
✓ Branch 37 taken 27 times.
✗ Branch 38 not taken.
✓ Branch 39 taken 12 times.
✓ Branch 40 taken 27 times.
✓ Branch 41 taken 12 times.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✓ Branch 66 taken 10 times.
✓ Branch 67 taken 44 times.
✓ Branch 68 taken 1 times.
✓ Branch 69 taken 9 times.
✓ Branch 70 taken 45 times.
✓ Branch 71 taken 9 times.
✓ Branch 72 taken 9 times.
✓ Branch 73 taken 42 times.
✗ Branch 74 not taken.
✓ Branch 75 taken 9 times.
✓ Branch 76 taken 42 times.
✓ Branch 77 taken 9 times.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
1182 } else if (is_hiragana_char(wc) || is_hiragana_iteration(wc)) {
1474 684 return JA_HIRA_QUAT_WEIGHT;
1475 }
1476 498 return -1;
1477 }
1478 3344914 return 0;
1479 }
1480
1481 // Generic version that can handle any number of levels.
1482 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1483 ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next_raw() {
1484 16220993452 int remain_weight = more_weight();
1485
36/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1192470 times.
✓ Branch 17 taken 6684643 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 34427865 times.
✓ Branch 21 taken 200280896 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 6687920 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 4460591 times.
✓ Branch 28 taken 30704587 times.
✓ Branch 29 taken 62322106 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 69816793 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 1230 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 36 times.
✓ Branch 53 taken 20661 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 201 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 48 times.
✓ Branch 60 taken 15 times.
✓ Branch 61 taken 6427 times.
✗ Branch 62 not taken.
✓ Branch 63 taken 222184054 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 67 times.
✓ Branch 89 taken 659 times.
✓ Branch 90 taken 67 times.
✓ Branch 91 taken 659 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 288 times.
✓ Branch 95 taken 8419 times.
✓ Branch 96 taken 270 times.
✓ Branch 97 taken 8437 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 32 times.
✓ Branch 100 taken 93 times.
✓ Branch 101 taken 16156 times.
✓ Branch 102 taken 93 times.
✓ Branch 103 taken 16156 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 8215 times.
✓ Branch 107 taken 15882816673 times.
✓ Branch 108 taken 8257 times.
✓ Branch 109 taken 15888173671 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 4116 times.
32409852927 if (remain_weight >= 0) return remain_weight;
1486
1487 do {
1488 32344164019 my_wc_t wc = 0;
1489
1490 /* Get next code point */
1491
44/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3336753 times.
✓ Branch 17 taken 3351395 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 100086405 times.
✓ Branch 21 taken 100285602 times.
✓ Branch 22 taken 3341438 times.
✓ Branch 23 taken 3349552 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 2228835 times.
✓ Branch 27 taken 2233980 times.
✓ Branch 28 taken 31149769 times.
✓ Branch 29 taken 31210613 times.
✓ Branch 30 taken 29503941 times.
✓ Branch 31 taken 40314760 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 140 times.
✓ Branch 49 taken 1102 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 90 times.
✓ Branch 53 taken 20571 times.
✓ Branch 54 taken 75 times.
✓ Branch 55 taken 126 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✓ Branch 58 taken 24 times.
✓ Branch 59 taken 24 times.
✓ Branch 60 taken 28 times.
✓ Branch 61 taken 6399 times.
✓ Branch 62 taken 94180208 times.
✓ Branch 63 taken 128005120 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 162 times.
✓ Branch 89 taken 506 times.
✓ Branch 90 taken 163 times.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 2059 times.
✓ Branch 95 taken 6366 times.
✓ Branch 96 taken 2092 times.
✓ Branch 97 taken 6345 times.
✓ Branch 98 taken 13 times.
✓ Branch 99 taken 19 times.
✓ Branch 100 taken 1823 times.
✓ Branch 101 taken 14334 times.
✓ Branch 102 taken 1832 times.
✓ Branch 103 taken 14324 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 236507897 times.
✓ Branch 107 taken 15647288712 times.
✓ Branch 108 taken 236124217 times.
✓ Branch 109 taken 15651581584 times.
✓ Branch 110 taken 1599 times.
✓ Branch 111 taken 2517 times.
32344164019 int mblen = mb_wc(&wc, sbeg, send);
1492
44/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3342897 times.
✓ Branch 17 taken 3345251 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 100270725 times.
✓ Branch 21 taken 100101282 times.
✓ Branch 22 taken 3347582 times.
✓ Branch 23 taken 3343408 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 2232931 times.
✓ Branch 27 taken 2229884 times.
✓ Branch 28 taken 31207113 times.
✓ Branch 29 taken 31153269 times.
✓ Branch 30 taken 29505989 times.
✓ Branch 31 taken 40312712 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 140 times.
✓ Branch 49 taken 1102 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 90 times.
✓ Branch 53 taken 20571 times.
✓ Branch 54 taken 75 times.
✓ Branch 55 taken 126 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✓ Branch 58 taken 24 times.
✓ Branch 59 taken 24 times.
✓ Branch 60 taken 28 times.
✓ Branch 61 taken 6399 times.
✓ Branch 62 taken 94180053 times.
✓ Branch 63 taken 128005275 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 162 times.
✓ Branch 89 taken 506 times.
✓ Branch 90 taken 163 times.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 2059 times.
✓ Branch 95 taken 6366 times.
✓ Branch 96 taken 2092 times.
✓ Branch 97 taken 6345 times.
✓ Branch 98 taken 13 times.
✓ Branch 99 taken 19 times.
✓ Branch 100 taken 1823 times.
✓ Branch 101 taken 14334 times.
✓ Branch 102 taken 1832 times.
✓ Branch 103 taken 14324 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 236507923 times.
✓ Branch 107 taken 15647288686 times.
✓ Branch 108 taken 236124001 times.
✓ Branch 109 taken 15651581800 times.
✓ Branch 110 taken 1599 times.
✓ Branch 111 taken 2517 times.
32344164019 if (mblen <= 0) {
1493 if (LEVELS_FOR_COMPARE == 1) {
1494 627526706 ++weight_lv;
1495 872789954 return -1;
1496 }
1497
1498
30/84
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 3342631 times.
✓ Branch 13 taken 266 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 66847212 times.
✓ Branch 17 taken 33423513 times.
✓ Branch 18 taken 2231736 times.
✓ Branch 19 taken 1115846 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 1116479 times.
✓ Branch 23 taken 1116452 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✓ Branch 36 taken 108 times.
✓ Branch 37 taken 32 times.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 60 times.
✓ Branch 41 taken 30 times.
✓ Branch 42 taken 50 times.
✓ Branch 43 taken 25 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✓ Branch 46 taken 12 times.
✓ Branch 47 taken 12 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✓ Branch 66 taken 131 times.
✓ Branch 67 taken 31 times.
✓ Branch 68 taken 132 times.
✓ Branch 69 taken 31 times.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✓ Branch 72 taken 1813 times.
✓ Branch 73 taken 246 times.
✓ Branch 74 taken 1847 times.
✓ Branch 75 taken 245 times.
✓ Branch 76 taken 10 times.
✓ Branch 77 taken 3 times.
✓ Branch 78 taken 935 times.
✓ Branch 79 taken 888 times.
✓ Branch 80 taken 944 times.
✓ Branch 81 taken 888 times.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
109202608 if (++weight_lv < LEVELS_FOR_COMPARE) {
1499
4/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3342631 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 108 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 131 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 132 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
3343002 if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) {
1500 // Return directly if we don't have quaternary weight.
1501
16/56
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 1114181 times.
✓ Branch 9 taken 2228450 times.
✓ Branch 10 taken 1113896 times.
✓ Branch 11 taken 285 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 36 times.
✓ Branch 25 taken 72 times.
✓ Branch 26 taken 4 times.
✓ Branch 27 taken 32 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✓ Branch 44 taken 43 times.
✓ Branch 45 taken 88 times.
✓ Branch 46 taken 3 times.
✓ Branch 47 taken 40 times.
✓ Branch 48 taken 43 times.
✓ Branch 49 taken 89 times.
✓ Branch 50 taken 3 times.
✓ Branch 51 taken 40 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
3343002 if (weight_lv == 3 && !has_quaternary_weight) return -1;
1502 }
1503 /*
1504 Restart scanning from the beginning of the string, and add
1505 a level separator.
1506 */
1507 72430194 sbeg = sbeg_dup;
1508 72430194 return 0;
1509 }
1510
1511 // If we don't have any more levels left, we're done.
1512 35658508 return -1;
1513 }
1514
1515 31607434705 sbeg += mblen;
1516
22/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 3345251 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✓ Branch 21 taken 100101282 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 3343408 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 2229884 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 31153269 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 40312712 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 1102 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 20571 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 126 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 24 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 6399 times.
✗ Branch 62 not taken.
✓ Branch 63 taken 128005275 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 506 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 6366 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 6345 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 19 times.
✗ Branch 100 not taken.
✓ Branch 101 taken 14334 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14324 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✓ Branch 107 taken 15647288686 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 15651581800 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
31607434705 assert(wc <= uca->maxchar); // mb_wc() has already checked this.
1517
1518
31/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3345251 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 43385949 times.
✓ Branch 21 taken 56715333 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 3343408 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 2229884 times.
✓ Branch 28 taken 12238127 times.
✓ Branch 29 taken 18915142 times.
✓ Branch 30 taken 12 times.
✓ Branch 31 taken 40312747 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 1102 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 8892 times.
✓ Branch 53 taken 11679 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 126 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 24 times.
✓ Branch 60 taken 2506 times.
✓ Branch 61 taken 3893 times.
✓ Branch 62 taken 362 times.
✓ Branch 63 taken 128003682 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 506 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 505 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 121 times.
✓ Branch 95 taken 6245 times.
✓ Branch 96 taken 121 times.
✓ Branch 97 taken 6224 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 19 times.
✗ Branch 100 not taken.
✓ Branch 101 taken 14334 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14324 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 489198 times.
✓ Branch 107 taken 15648356894 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 15654882657 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
31607434705 if (my_uca_have_contractions(uca)) {
1519 const uint16 *cweight;
1520 /*
1521 If we have scanned a code point which can have previous context,
1522 and there were some more code points already before,
1523 then verify that {prev_char, wc} together form
1524 a real previous context pair.
1525 Note, we support only 2-character long sequences with previous
1526 context at the moment. CLDR does not have longer sequences.
1527 CLDR doesn't have previous context rule whose first character is
1528 0x0000, so the initial value (0) of prev_char won't break the logic.
1529 */
1530
12/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 559 times.
✓ Branch 17 taken 4093 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 213 times.
✓ Branch 21 taken 4080 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 16 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✓ Branch 89 taken 16 times.
✓ Branch 90 taken 57 times.
✓ Branch 91 taken 21 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 3 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 3 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
58984144 if (my_uca_can_be_previous_context_tail(uca->contraction_flags, wc) &&
1531 9174 my_uca_can_be_previous_context_head(uca->contraction_flags,
1532
36/224
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 4652 times.
✓ Branch 33 taken 3340599 times.
✓ Branch 34 taken 559 times.
✓ Branch 35 taken 3344692 times.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 4293 times.
✓ Branch 41 taken 43381656 times.
✓ Branch 42 taken 213 times.
✓ Branch 43 taken 43385736 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✓ Branch 57 taken 12238127 times.
✗ Branch 58 not taken.
✓ Branch 59 taken 12238127 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✓ Branch 96 taken 72 times.
✓ Branch 97 taken 1030 times.
✓ Branch 98 taken 56 times.
✓ Branch 99 taken 1046 times.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✓ Branch 105 taken 8892 times.
✗ Branch 106 not taken.
✓ Branch 107 taken 8892 times.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✓ Branch 121 taken 2506 times.
✗ Branch 122 not taken.
✓ Branch 123 taken 2506 times.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✓ Branch 176 taken 73 times.
✓ Branch 177 taken 433 times.
✓ Branch 178 taken 57 times.
✓ Branch 179 taken 449 times.
✓ Branch 180 taken 78 times.
✓ Branch 181 taken 427 times.
✓ Branch 182 taken 57 times.
✓ Branch 183 taken 448 times.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✓ Branch 188 taken 3 times.
✓ Branch 189 taken 118 times.
✗ Branch 190 not taken.
✓ Branch 191 taken 121 times.
✓ Branch 192 taken 3 times.
✓ Branch 193 taken 118 times.
✗ Branch 194 not taken.
✓ Branch 195 taken 121 times.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✓ Branch 213 taken 5140 times.
✗ Branch 214 not taken.
✓ Branch 215 taken 5140 times.
✗ Branch 216 not taken.
✓ Branch 217 taken 5231 times.
✗ Branch 218 not taken.
✓ Branch 219 taken 5231 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
59002625 prev_char) &&
1533
10/224
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 559 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 559 times.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 213 times.
✗ Branch 41 not taken.
✓ Branch 42 taken 213 times.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✓ Branch 96 taken 56 times.
✗ Branch 97 not taken.
✓ Branch 98 taken 56 times.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✓ Branch 176 taken 57 times.
✗ Branch 177 not taken.
✓ Branch 178 taken 57 times.
✗ Branch 179 not taken.
✓ Branch 180 taken 57 times.
✗ Branch 181 not taken.
✓ Branch 182 taken 57 times.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
1884 (cweight = previous_context_find(prev_char, wc))) {
1534 // For Japanese kana-sensitive collation.
1535
4/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 559 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 56 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 57 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 57 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
729 if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) {
1536 729 int quat_wt = handle_ja_contraction_quat_wt();
1537 729 prev_char = 0;
1538
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 80 times.
✓ Branch 5 taken 479 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 14 times.
✓ Branch 13 taken 42 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 12 times.
✓ Branch 23 taken 45 times.
✓ Branch 24 taken 12 times.
✓ Branch 25 taken 45 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
729 if (quat_wt > 0) return quat_wt;
1539 }
1540 824 prev_char = 0; /* Clear for the next code point */
1541 824 return *cweight;
1542
18/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 3344692 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 48381 times.
✓ Branch 21 taken 43337355 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 13701 times.
✓ Branch 29 taken 12224426 times.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 1046 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 807 times.
✓ Branch 53 taken 8085 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 239 times.
✓ Branch 61 taken 2267 times.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 449 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 448 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 121 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 121 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 354 times.
✓ Branch 107 taken 4786 times.
✓ Branch 108 taken 312 times.
✓ Branch 109 taken 4919 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
58992509 } else if (my_uca_can_be_contraction_head(uca->contraction_flags, wc)) {
1543 /* Check if wc starts a contraction */
1544 size_t chars_skipped; // Ignored.
1545
18/224
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 48381 times.
✗ Branch 41 not taken.
✓ Branch 42 taken 162 times.
✓ Branch 43 taken 48219 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✓ Branch 56 taken 13701 times.
✗ Branch 57 not taken.
✓ Branch 58 taken 129 times.
✓ Branch 59 taken 13572 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 807 times.
✗ Branch 105 not taken.
✓ Branch 106 taken 36 times.
✓ Branch 107 taken 771 times.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✓ Branch 120 taken 239 times.
✗ Branch 121 not taken.
✓ Branch 122 taken 12 times.
✓ Branch 123 taken 227 times.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✓ Branch 212 taken 354 times.
✗ Branch 213 not taken.
✓ Branch 214 taken 104 times.
✓ Branch 215 taken 250 times.
✓ Branch 216 taken 312 times.
✗ Branch 217 not taken.
✓ Branch 218 taken 101 times.
✓ Branch 219 taken 211 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
63794 if ((cweight = contraction_find(wc, &chars_skipped))) return *cweight;
1546 }
1547 58991965 prev_char = wc;
1548 }
1549
1550 // For Japanese kana-sensitive collation.
1551
4/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3344692 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 1046 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 449 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 448 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
3346635 if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) {
1552
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1115087 times.
✓ Branch 5 taken 2229605 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 321 times.
✓ Branch 13 taken 725 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 134 times.
✓ Branch 23 taken 315 times.
✓ Branch 24 taken 133 times.
✓ Branch 25 taken 315 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
3346635 int quat_wt = handle_ja_common_quat_wt(wc);
1553
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 468 times.
✓ Branch 5 taken 3344224 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 1034 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 9 times.
✓ Branch 23 taken 440 times.
✓ Branch 24 taken 9 times.
✓ Branch 25 taken 439 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
3346635 if (quat_wt == -1)
1554 498 continue;
1555
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 990 times.
✓ Branch 5 taken 3343234 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 71 times.
✓ Branch 13 taken 963 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 81 times.
✓ Branch 23 taken 359 times.
✓ Branch 24 taken 81 times.
✓ Branch 25 taken 358 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
3346137 else if (quat_wt)
1556 1223 return quat_wt;
1557 }
1558 /* Process single code point */
1559 31611809376 uint page = wc >> 8;
1560 31611809376 uint code = wc & 0xFF;
1561
1562 /* If weight page for wc does not exist, then calculate algorithmically */
1563 31611809376 const uint16 *wpage = uca->weights[page];
1564
35/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3158948 times.
✓ Branch 17 taken 184286 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 96365853 times.
✓ Branch 21 taken 3735054 times.
✓ Branch 22 taken 3221961 times.
✓ Branch 23 taken 121447 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 2148036 times.
✓ Branch 27 taken 81848 times.
✓ Branch 28 taken 30069760 times.
✓ Branch 29 taken 1083380 times.
✓ Branch 30 taken 1074166 times.
✓ Branch 31 taken 39238581 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 963 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 20535 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 126 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 24 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 6387 times.
✓ Branch 62 taken 32 times.
✓ Branch 63 taken 128003650 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 359 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 358 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 142 times.
✓ Branch 95 taken 6224 times.
✓ Branch 96 taken 119 times.
✓ Branch 97 taken 6226 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 19 times.
✓ Branch 100 taken 138 times.
✓ Branch 101 taken 14196 times.
✓ Branch 102 taken 120 times.
✓ Branch 103 taken 14204 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 7953 times.
✓ Branch 107 taken 15648353977 times.
✓ Branch 108 taken 10703 times.
✓ Branch 109 taken 15654877084 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
31747867307 if (!wpage) return next_implicit(wc);
1565
1566 /* Calculate pointer to wc's weight, using page and offset */
1567 31475751445 wbeg = UCA900_WEIGHT_ADDR(wpage, weight_lv, code);
1568 31475751445 wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS;
1569 185966 num_of_ce_left = UCA900_NUM_OF_CE(wpage, code);
1570
35/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3505 times.
✓ Branch 17 taken 181249 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 91111 times.
✓ Branch 21 taken 3643943 times.
✓ Branch 22 taken 3070 times.
✓ Branch 23 taken 118377 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 2224 times.
✓ Branch 27 taken 79624 times.
✓ Branch 28 taken 38276 times.
✓ Branch 29 taken 1045104 times.
✓ Branch 30 taken 1908 times.
✓ Branch 31 taken 39236673 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 12 times.
✓ Branch 49 taken 963 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 20535 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 126 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 24 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 6387 times.
✓ Branch 62 taken 1274 times.
✓ Branch 63 taken 128002376 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 9 times.
✓ Branch 89 taken 359 times.
✓ Branch 90 taken 9 times.
✓ Branch 91 taken 358 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 6 times.
✓ Branch 95 taken 6218 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 6226 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 19 times.
✓ Branch 100 taken 1 times.
✓ Branch 101 taken 14195 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14204 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 979936 times.
✓ Branch 107 taken 15647374041 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 15655344954 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
31475751943 } while (!wbeg[0]); /* Skip ignorable code points */
1571
1572 31475098472 uint16 rtn = *wbeg;
1573 31475098472 wbeg += wbeg_stride;
1574 31475098472 --num_of_ce_left;
1575 31475098472 return rtn;
1576 }
1577
1578 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1579 template <class T, class U>
1580 ALWAYS_INLINE void uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::for_each_weight(
1581 T func, U preaccept_data) {
1582
13/64
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 1115908 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 1115908 times.
✓ Branch 24 taken 1116522 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 1116522 times.
✓ Branch 28 taken 32009877 times.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 32009877 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 25 times.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✓ Branch 55 taken 25 times.
✓ Branch 56 taken 12 times.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 12 times.
✓ Branch 60 taken 94178731 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 210 times.
✓ Branch 63 taken 94178521 times.
128420905 if (cs->tailoring || cs->mbminlen != 1 || cs->coll_param) {
1583 // Slower, generic path.
1584 int s_res;
1585
12/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 6762951 times.
✓ Branch 9 taken 1114162 times.
✓ Branch 10 taken 201285248 times.
✓ Branch 11 taken 33423513 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✓ Branch 14 taken 61819580 times.
✓ Branch 15 taken 31207113 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 1250 times.
✓ Branch 25 taken 36 times.
✓ Branch 26 taken 20667 times.
✓ Branch 27 taken 30 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 6414 times.
✓ Branch 31 taken 28 times.
401385987 while ((s_res = next()) >= 0) {
1586
11/48
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 6762951 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 111 times.
✓ Branch 19 taken 6762840 times.
✓ Branch 20 taken 201285248 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 166 times.
✓ Branch 23 taken 201285082 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 61819580 times.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 61819580 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✓ Branch 41 taken 1250 times.
✗ Branch 42 not taken.
✓ Branch 43 taken 20667 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✓ Branch 47 taken 6414 times.
269896110 if (!func(s_res, s_res == 0)) return;
1587 }
1588 65744882 return;
1589 }
1590
1591 /*
1592 Fast path. TODO: See if we can accept some character sets
1593 with tailorings.
1594 */
1595 128420865 const uint16 *ascii_wpage =
1596 128420865 UCA900_WEIGHT_ADDR(uca->weights[0], /*level=*/weight_lv, /*subcode=*/0);
1597
1598 /*
1599 Precalculate the limit for the fast path below, taking care not to form
1600 pointers that are before sbeg, as those cannot be legally compared.
1601 (In particular, this catches the case of sbeg == send == nullptr.)
1602 */
1603
12/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1048760 times.
✓ Branch 11 taken 67148 times.
✓ Branch 12 taken 1049318 times.
✓ Branch 13 taken 67204 times.
✓ Branch 14 taken 27282065 times.
✓ Branch 15 taken 4727812 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 12 times.
✓ Branch 27 taken 13 times.
✓ Branch 28 taken 4 times.
✓ Branch 29 taken 8 times.
✓ Branch 30 taken 89507816 times.
✓ Branch 31 taken 4670705 times.
128420865 const uchar *send_local = (send - sbeg > 3) ? (send - 3) : sbeg;
1604
1605 174728549 for (;;) {
1606 /*
1607 We could have more weights left from the previous call to next()
1608 (if any) that we need to deal with.
1609 */
1610 int s_res;
1611
11/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1149497 times.
✓ Branch 11 taken 6687920 times.
✓ Branch 12 taken 1123215 times.
✓ Branch 13 taken 4460591 times.
✓ Branch 14 taken 1096841 times.
✓ Branch 15 taken 69817044 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 4 times.
✓ Branch 27 taken 201 times.
✓ Branch 28 taken 2 times.
✓ Branch 29 taken 48 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 222183814 times.
306518991 while ((s_res = more_weight()) >= 0) {
1612
11/48
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 1149497 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 1149497 times.
✓ Branch 24 taken 1123215 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 8 times.
✓ Branch 27 taken 1123207 times.
✓ Branch 28 taken 1096838 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 3 times.
✓ Branch 31 taken 1096835 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✓ Branch 43 taken 4 times.
✗ Branch 44 not taken.
✓ Branch 45 taken 2 times.
✗ Branch 46 not taken.
✓ Branch 47 taken 32 times.
3369206 if (!func(s_res, s_res == 0)) return;
1613 }
1614
1615 /*
1616 Loop in a simple fast path as long as we only have non-ignorable
1617 ASCII characters. These characters always have exactly a single weight
1618 and consist of only a single byte, so we can skip a lot of the checks
1619 we'd otherwise have to do.
1620 */
1621 303149618 const uchar *sbeg_local = sbeg;
1622
33/96
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 3147504 times.
✓ Branch 31 taken 3541031 times.
✓ Branch 32 taken 3147411 times.
✓ Branch 33 taken 93 times.
✓ Branch 34 taken 3147411 times.
✓ Branch 35 taken 3541124 times.
✓ Branch 36 taken 2099640 times.
✓ Branch 37 taken 2362461 times.
✓ Branch 38 taken 2099517 times.
✓ Branch 39 taken 123 times.
✓ Branch 40 taken 2099517 times.
✓ Branch 41 taken 2362584 times.
✓ Branch 42 taken 284052535 times.
✓ Branch 43 taken 66275647 times.
✓ Branch 44 taken 281568588 times.
✓ Branch 45 taken 2483934 times.
✓ Branch 46 taken 281568588 times.
✓ Branch 47 taken 68759581 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✓ Branch 78 taken 210 times.
✓ Branch 79 taken 174 times.
✓ Branch 80 taken 210 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 210 times.
✓ Branch 83 taken 174 times.
✓ Branch 84 taken 120 times.
✓ Branch 85 taken 42 times.
✓ Branch 86 taken 120 times.
✗ Branch 87 not taken.
✓ Branch 88 taken 120 times.
✓ Branch 89 taken 42 times.
✓ Branch 90 taken 384133859 times.
✓ Branch 91 taken 222183385 times.
✓ Branch 92 taken 384134625 times.
✗ Branch 93 not taken.
✓ Branch 94 taken 384135310 times.
✓ Branch 95 taken 222182569 times.
967796608 while (sbeg_local < send_local && preaccept_data(sizeof(uint32))) {
1623 /*
1624 Check if all four bytes are in the range 0x20..0x7e, inclusive.
1625 These have exactly one weight. Note that this unfortunately does not
1626 include tab and newline, which would otherwise be legal candidates.
1627
1628 See the FastOutOfRange unit test for verification that the bitfiddling
1629 trick used here is correct.
1630 */
1631 uint32 four_bytes;
1632 670951156 memcpy(&four_bytes, sbeg_local, sizeof(four_bytes));
1633
12/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 615 times.
✓ Branch 11 taken 3146796 times.
✓ Branch 12 taken 1510 times.
✓ Branch 13 taken 2098007 times.
✓ Branch 14 taken 280518457 times.
✓ Branch 15 taken 1050131 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 183 times.
✓ Branch 27 taken 27 times.
✓ Branch 28 taken 114 times.
✓ Branch 29 taken 6 times.
✓ Branch 30 taken 384133902 times.
✓ Branch 31 taken 1408 times.
670951156 if (((four_bytes + 0x01010101u) & 0x80808080) ||
1634
7/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 615 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 1510 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 280511138 times.
✓ Branch 15 taken 7319 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 183 times.
✗ Branch 27 not taken.
✓ Branch 28 taken 114 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 384134163 times.
✗ Branch 31 not taken.
664654781 ((four_bytes - 0x20202020u) & 0x80808080))
1635 break;
1636 664647723 const int s_res0 = ascii_wpage[sbeg_local[0]];
1637 664647723 const int s_res1 = ascii_wpage[sbeg_local[1]];
1638 664647723 const int s_res2 = ascii_wpage[sbeg_local[2]];
1639 664647723 const int s_res3 = ascii_wpage[sbeg_local[3]];
1640
6/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 615 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1510 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 280511138 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 183 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 114 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 384134163 times.
664647723 assert(s_res0 != 0);
1641
6/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 615 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1510 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 280511138 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 183 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 114 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 384134163 times.
664647723 assert(s_res1 != 0);
1642
6/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 615 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1510 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 280511138 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 183 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 114 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 384134163 times.
664647723 assert(s_res2 != 0);
1643
6/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 615 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1510 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 280511138 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 183 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 114 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 384134163 times.
664647723 assert(s_res3 != 0);
1644
3/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 615 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 1510 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 280511138 times.
✗ Branch 15 not taken.
664647723 func(s_res0, /*is_level_separator=*/false);
1645
3/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 615 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 1510 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 280511138 times.
✗ Branch 15 not taken.
664645410 func(s_res1, /*is_level_separator=*/false);
1646
3/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 615 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 1510 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 280511138 times.
✗ Branch 15 not taken.
664645691 func(s_res2, /*is_level_separator=*/false);
1647
3/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 615 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 1510 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 280511138 times.
✗ Branch 15 not taken.
664645083 func(s_res3, /*is_level_separator=*/false);
1648 664646990 sbeg_local += sizeof(uint32);
1649 }
1650 303149507 sbeg = sbeg_local;
1651
1652 // Do a single code point in the generic path.
1653 303148049 s_res = next_raw();
1654
10/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 2231736 times.
✓ Branch 11 taken 4456184 times.
✓ Branch 12 taken 1116479 times.
✓ Branch 13 taken 3344112 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 69816828 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 50 times.
✓ Branch 27 taken 151 times.
✓ Branch 28 taken 12 times.
✓ Branch 29 taken 36 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 222182461 times.
303148049 if (s_res == 0) {
1655 // Level separator, so we have to update our page pointer.
1656 3348277 ascii_wpage += UCA900_DISTANCE_BETWEEN_LEVELS;
1657 }
1658
37/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 5572074 times.
✓ Branch 41 taken 1115846 times.
✓ Branch 42 taken 5572074 times.
✗ Branch 43 not taken.
✓ Branch 44 taken 62 times.
✓ Branch 45 taken 5572012 times.
✓ Branch 46 taken 1115908 times.
✓ Branch 47 taken 5572012 times.
✓ Branch 48 taken 3344139 times.
✓ Branch 49 taken 1116452 times.
✓ Branch 50 taken 3344139 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 62 times.
✓ Branch 53 taken 3344077 times.
✓ Branch 54 taken 1116514 times.
✓ Branch 55 taken 3344077 times.
✓ Branch 56 taken 40310830 times.
✓ Branch 57 taken 29505998 times.
✓ Branch 58 taken 40310766 times.
✗ Branch 59 not taken.
✓ Branch 60 taken 2503552 times.
✓ Branch 61 taken 37807214 times.
✓ Branch 62 taken 32009568 times.
✓ Branch 63 taken 37807196 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 176 times.
✓ Branch 95 taken 25 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 176 times.
✓ Branch 98 taken 25 times.
✓ Branch 99 taken 176 times.
✓ Branch 100 taken 36 times.
✓ Branch 101 taken 12 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 36 times.
✓ Branch 104 taken 12 times.
✓ Branch 105 taken 36 times.
✓ Branch 106 taken 128003707 times.
✓ Branch 107 taken 94178754 times.
✓ Branch 108 taken 1340 times.
✓ Branch 109 taken 128003800 times.
✓ Branch 110 taken 94178842 times.
✓ Branch 111 taken 128005052 times.
303148049 if (s_res < 0 || !func(s_res, s_res == 0)) return;
1659 }
1660 }
1661
1662 /**
1663 Change a weight according to the reorder parameters.
1664 @param weight The weight to change
1665 @retval reordered weight
1666 */
1667 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1668 66337916 uint16 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::apply_reorder_param(
1669 uint16 weight) {
1670 /*
1671 Chinese collation's reordering is done in next_implicit() and
1672 modify_all_zh_pages(). See the comment on zh_reorder_param and
1673 change_zh_implicit().
1674 */
1675
2/2
✓ Branch 0 taken 2167391 times.
✓ Branch 1 taken 31001567 times.
66337916 if (cs->coll_param == &zh_coll_param) return weight;
1676 62003134 const Reorder_param *param = cs->coll_param->reorder_param;
1677
4/4
✓ Branch 0 taken 30878566 times.
✓ Branch 1 taken 123001 times.
✓ Branch 2 taken 263958 times.
✓ Branch 3 taken 30614608 times.
62003134 if (weight >= START_WEIGHT_TO_REORDER && weight <= param->max_weight) {
1678
1/2
✓ Branch 0 taken 729107 times.
✗ Branch 1 not taken.
1458214 for (int rec_ind = 0; rec_ind < param->wt_rec_num; ++rec_ind) {
1679 1458214 const Reorder_wt_rec *wt_rec = param->wt_rec + rec_ind;
1680
2/2
✓ Branch 0 taken 525120 times.
✓ Branch 1 taken 203987 times.
1458214 if (weight >= wt_rec->old_wt_bdy.begin &&
1681
2/2
✓ Branch 0 taken 263958 times.
✓ Branch 1 taken 261162 times.
1050240 weight <= wt_rec->old_wt_bdy.end) {
1682 /*
1683 As commented in adjust_japanese_weight(), if this is a Japanese
1684 collation, for characters whose weight is between Latin and Kana
1685 group, and for the characters whose weight is between Kana and
1686 Han, we need to change their weight to be after all Han
1687 characters. We decide to give them the weights [FB86 0000 0000]
1688 [origin weight] to make sure the new weights are greater than
1689 the maximum implicit weight of Han characters. If this character's
1690 origin weight has more than one non-ignorable primary weight, for
1691 example, [AAAA 0020 0002][BBBB 0020 0002], both AAAA and BBBB need
1692 to be changed. The new weight should be:
1693 [FB86 0000 0000][AAAA 0020 0002][FB86 0000 0000][BBBB 0020 0002].
1694 */
1695
4/4
✓ Branch 0 taken 212204 times.
✓ Branch 1 taken 51754 times.
✓ Branch 2 taken 201382 times.
✓ Branch 3 taken 10822 times.
527916 if (param == &ja_reorder_param && wt_rec->new_wt_bdy.begin == 0) {
1696 402764 return_origin_weight = !return_origin_weight;
1697
2/2
✓ Branch 0 taken 100684 times.
✓ Branch 1 taken 100698 times.
402764 if (return_origin_weight) break;
1698
1699 /*
1700 We didn't consume the weight; rewind the iterator, so we will
1701 get another call where we can output it.
1702 */
1703 201396 wbeg -= wbeg_stride;
1704 201396 ++num_of_ce_left;
1705 201396 return 0xFB86;
1706 }
1707
1708 // Regular (non-Japanese-specific) reordering.
1709 125152 return weight - wt_rec->old_wt_bdy.begin + wt_rec->new_wt_bdy.begin;
1710 }
1711 }
1712 }
1713 61676586 return weight;
1714 }
1715
1716 // See Unicode TR35 section 3.14.1.
1717 1141831 static bool is_tertiary_weight_upper_case(uint16 weight) {
1718
10/10
✓ Branch 0 taken 7280 times.
✓ Branch 1 taken 1134551 times.
✓ Branch 2 taken 2929 times.
✓ Branch 3 taken 4351 times.
✓ Branch 4 taken 1137405 times.
✓ Branch 5 taken 75 times.
✓ Branch 6 taken 1137326 times.
✓ Branch 7 taken 79 times.
✓ Branch 8 taken 1137213 times.
✓ Branch 9 taken 113 times.
1141831 if ((weight >= 0x08 && weight <= 0x0C) || weight == 0x0E || weight == 0x11 ||
1719
2/2
✓ Branch 0 taken 228 times.
✓ Branch 1 taken 1136985 times.
1137213 weight == 0x12 || weight == 0x1D)
1720 4846 return true;
1721 1136985 return false;
1722 }
1723
1724 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1725 8963834 uint16 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::apply_case_first(
1726 uint16 weight) {
1727 /*
1728 We only apply case weight change here when the character is not tailored.
1729 Tailored character's case weight has been changed in
1730 my_char_weight_put_900().
1731 We have only 1 collation (Danish) needs to implement [caseFirst upper].
1732 */
1733
5/6
✓ Branch 0 taken 4481917 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1137030 times.
✓ Branch 3 taken 3344887 times.
✓ Branch 4 taken 1136901 times.
✓ Branch 5 taken 129 times.
8963834 if (cs->coll_param->case_first == CASE_FIRST_UPPER && weight_lv == 2 &&
1734 weight < 0x20) {
1735
2/2
✓ Branch 0 taken 2381 times.
✓ Branch 1 taken 1134520 times.
2273802 if (is_tertiary_weight_upper_case(weight))
1736 4762 weight |= CASE_FIRST_UPPER_MASK;
1737 else
1738 2269040 weight |= CASE_FIRST_LOWER_MASK;
1739 }
1740 8963834 return weight;
1741 }
1742
1743 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1744 ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next() {
1745 32111082756 int res = next_raw();
1746 32111082756 Coll_param *param = cs->coll_param;
1747
56/160
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 4534216 times.
✓ Branch 17 taken 3342897 times.
✓ Branch 18 taken 4534216 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 134438036 times.
✓ Branch 21 taken 100270725 times.
✓ Branch 22 taken 53782790 times.
✓ Branch 23 taken 80655246 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 61819580 times.
✓ Branch 29 taken 31207113 times.
✓ Branch 30 taken 13244249 times.
✓ Branch 31 taken 48575331 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 1146 times.
✓ Branch 49 taken 140 times.
✓ Branch 50 taken 1146 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 20607 times.
✓ Branch 53 taken 90 times.
✓ Branch 54 taken 8244 times.
✓ Branch 55 taken 12363 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 6414 times.
✓ Branch 61 taken 28 times.
✓ Branch 62 taken 1374 times.
✓ Branch 63 taken 5040 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 564 times.
✓ Branch 113 taken 162 times.
✓ Branch 114 taken 564 times.
✗ Branch 115 not taken.
✓ Branch 116 taken 563 times.
✓ Branch 117 taken 163 times.
✓ Branch 118 taken 563 times.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 6648 times.
✓ Branch 125 taken 2059 times.
✓ Branch 126 taken 472 times.
✓ Branch 127 taken 6176 times.
✓ Branch 128 taken 6615 times.
✓ Branch 129 taken 2092 times.
✓ Branch 130 taken 456 times.
✓ Branch 131 taken 6159 times.
✓ Branch 132 taken 19 times.
✓ Branch 133 taken 13 times.
✗ Branch 134 not taken.
✓ Branch 135 taken 19 times.
✓ Branch 136 taken 14426 times.
✓ Branch 137 taken 1823 times.
✗ Branch 138 not taken.
✓ Branch 139 taken 14426 times.
✓ Branch 140 taken 14417 times.
✓ Branch 141 taken 1832 times.
✗ Branch 142 not taken.
✓ Branch 143 taken 14417 times.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✓ Branch 148 taken 15650607515 times.
✓ Branch 149 taken 233290721 times.
✓ Branch 150 taken 783 times.
✓ Branch 151 taken 15650606732 times.
✓ Branch 152 taken 15654458129 times.
✓ Branch 153 taken 237029887 times.
✓ Branch 154 taken 800 times.
✓ Branch 155 taken 15654457329 times.
✓ Branch 156 taken 2517 times.
✓ Branch 157 taken 1599 times.
✗ Branch 158 not taken.
✓ Branch 159 taken 2517 times.
32111082756 if (res > 0 && param) {
1748 /* Reorder weight change only on primary level. */
1749
36/160
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 4534216 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 2254867 times.
✓ Branch 19 taken 2279349 times.
✓ Branch 20 taken 35859705 times.
✓ Branch 21 taken 17923085 times.
✓ Branch 22 taken 17664393 times.
✓ Branch 23 taken 18195312 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 13244249 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 13244249 times.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 1146 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 335 times.
✓ Branch 51 taken 811 times.
✓ Branch 52 taken 5496 times.
✓ Branch 53 taken 2748 times.
✓ Branch 54 taken 1832 times.
✓ Branch 55 taken 3664 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 1374 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 1374 times.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 564 times.
✗ Branch 113 not taken.
✓ Branch 114 taken 149 times.
✓ Branch 115 taken 415 times.
✓ Branch 116 taken 563 times.
✗ Branch 117 not taken.
✓ Branch 118 taken 148 times.
✓ Branch 119 taken 415 times.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 41 times.
✓ Branch 125 taken 431 times.
✓ Branch 126 taken 14 times.
✓ Branch 127 taken 27 times.
✓ Branch 128 taken 45 times.
✓ Branch 129 taken 411 times.
✓ Branch 130 taken 14 times.
✓ Branch 131 taken 31 times.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✓ Branch 148 taken 783 times.
✗ Branch 149 not taken.
✓ Branch 150 taken 783 times.
✗ Branch 151 not taken.
✓ Branch 152 taken 800 times.
✗ Branch 153 not taken.
✓ Branch 154 taken 800 times.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
71575657 if (param->reorder_param && weight_lv == 0) res = apply_reorder_param(res);
1750
16/80
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 4534216 times.
✓ Branch 10 taken 4480937 times.
✓ Branch 11 taken 49301853 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 13244249 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 1146 times.
✓ Branch 26 taken 687 times.
✓ Branch 27 taken 7557 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 1374 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✓ Branch 57 taken 564 times.
✗ Branch 58 not taken.
✓ Branch 59 taken 563 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✓ Branch 62 taken 151 times.
✓ Branch 63 taken 321 times.
✓ Branch 64 taken 142 times.
✓ Branch 65 taken 314 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✓ Branch 75 taken 783 times.
✗ Branch 76 not taken.
✓ Branch 77 taken 800 times.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
71575657 if (param->case_first != CASE_FIRST_OFF) res = apply_case_first(res);
1751 }
1752 32111082756 return res;
1753 }
1754
1755 /*
1756 Compares two strings according to the collation
1757
1758 SYNOPSIS:
1759 my_strnncoll_uca()
1760 cs Character set information
1761 s First string
1762 slen First string length
1763 t Second string
1764 tlen Second string length
1765
1766 NOTES:
1767 Initializes two weight scanners and gets weights
1768 corresponding to two strings in a loop. If weights are not
1769 the same at some step then returns their difference.
1770
1771 In the while() comparison these situations are possible:
1772 1. (s_res>0) and (t_res>0) and (s_res == t_res)
1773 Weights are the same so far, continue comparison
1774 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
1775 A difference has been found, return.
1776 3. (s_res>0) and (t_res<0)
1777 We have reached the end of the second string, or found
1778 an illegal multibyte sequence in the second string.
1779 Return a positive number, i.e. the first string is bigger.
1780 4. (s_res<0) and (t_res>0)
1781 We have reached the end of the first string, or found
1782 an illegal multibyte sequence in the first string.
1783 Return a negative number, i.e. the second string is bigger.
1784 5. (s_res<0) and (t_res<0)
1785 Both scanners returned -1. It means we have riched
1786 the end-of-string of illegal-sequence in both strings
1787 at the same time. Return 0, strings are equal.
1788
1789 RETURN
1790 Difference between two strings, according to the collation:
1791 0 - means strings are equal
1792 negative number - means the first string is smaller
1793 positive number - means the first string is bigger
1794 */
1795
1796 template <class Scanner, int LEVELS_FOR_COMPARE, class Mb_wc>
1797 10137428786 static int my_strnncoll_uca(const CHARSET_INFO *cs, const Mb_wc mb_wc,
1798 const uchar *s, size_t slen, const uchar *t,
1799 size_t tlen, bool t_is_prefix) {
1800 10137428786 Scanner sscanner(mb_wc, cs, s, slen);
1801 10137127820 Scanner tscanner(mb_wc, cs, t, tlen);
1802 10137115710 int s_res = 0;
1803 10137115710 int t_res = 0;
1804
1805 /*
1806 We compare 2 strings in same level first. If only string A's scanner
1807 has gone to next level, which means another string, B's weight of
1808 current level is longer than A's. We'll compare B's remaining weights
1809 with space.
1810 */
1811
2/2
✓ Branch 0 taken 5068533939 times.
✓ Branch 1 taken 26719 times.
10137121316 for (uint current_lv = 0; current_lv < LEVELS_FOR_COMPARE; ++current_lv) {
1812 /* Run the scanners until one of them runs out of current lv */
1813 do {
1814
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 182 times.
31767848200 s_res = sscanner.next();
1815 31783027760 t_res = tscanner.next();
1816
4/4
✓ Branch 0 taken 10825363536 times.
✓ Branch 1 taken 233227620 times.
✓ Branch 2 taken 10818634022 times.
✓ Branch 3 taken 3497258 times.
43761444872 } while (s_res == t_res && s_res >= 0 &&
1817
5/6
✓ Branch 0 taken 11058591156 times.
✓ Branch 1 taken 4832922724 times.
✓ Branch 2 taken 10816845624 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 10816814719 times.
✓ Branch 5 taken 5068657911 times.
75538854644 sscanner.get_weight_level() == current_lv &&
1818 21637268044 tscanner.get_weight_level() == current_lv);
1819
1820 /*
1821 Two scanners run to next level at same time, or we found a difference,
1822 or we found an error.
1823 */
1824
2/2
✓ Branch 0 taken 5066398992 times.
✓ Branch 1 taken 2161304 times.
10137315822 if (sscanner.get_weight_level() == tscanner.get_weight_level()) {
1825
4/4
✓ Branch 0 taken 235242215 times.
✓ Branch 1 taken 4831156777 times.
✓ Branch 2 taken 2793 times.
✓ Branch 3 taken 235239422 times.
10132797984 if (s_res == t_res && s_res >= 0) continue;
1826 10132792398 break; // Error or inequality found, end.
1827 }
1828
1829
2/2
✓ Branch 0 taken 877804 times.
✓ Branch 1 taken 1283500 times.
4322608 if (tscanner.get_weight_level() > current_lv) {
1830 // t ran out of weights on this level, and s didn't.
1831
2/2
✓ Branch 0 taken 1612 times.
✓ Branch 1 taken 876192 times.
1755608 if (t_is_prefix) {
1832 // Consume the rest of the weights from s.
1833 do {
1834 8296 s_res = sscanner.next();
1835
6/6
✓ Branch 0 taken 2546 times.
✓ Branch 1 taken 1602 times.
✓ Branch 2 taken 2536 times.
✓ Branch 3 taken 10 times.
✓ Branch 4 taken 2536 times.
✓ Branch 5 taken 1612 times.
8296 } while (s_res >= 0 && sscanner.get_weight_level() == current_lv);
1836
1837
2/2
✓ Branch 0 taken 1602 times.
✓ Branch 1 taken 10 times.
3224 if (s_res < 0) break; // Error found, end.
1838
1839 // s is now also on the next level. Continue comparison.
1840 20 continue;
1841 } else {
1842 // s is longer than t (and t_prefix isn't set).
1843 1752384 return 1;
1844 }
1845 }
1846
1847
1/2
✓ Branch 0 taken 1283500 times.
✗ Branch 1 not taken.
2567000 if (sscanner.get_weight_level() > current_lv) {
1848 // s ran out of weights on this level, and t didn't.
1849 2567000 return -1;
1850 }
1851
1852 break;
1853 }
1854
1855 10132849040 return (s_res - t_res);
1856 }
1857
1858 769377 static inline int my_space_weight(const CHARSET_INFO *cs) /* W3-TODO */
1859 {
1860
2/4
✓ Branch 0 taken 769377 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 769377 times.
769377 if (cs->uca && cs->uca->version == UCA_V900)
1861 return UCA900_WEIGHT(cs->uca->weights[0], /*weight_lv=*/0, 0x20);
1862 else
1863 769377 return cs->uca->weights[0][0x20 * cs->uca->lengths[0]];
1864 }
1865
1866 /**
1867 Helper function:
1868 Find address of weights of the given code point.
1869
1870 @param uca Pointer to UCA data
1871 @param wc character Unicode code point
1872
1873 @return Weight array
1874 @retval pointer to weight array for the given code point,
1875 or nullptr if this page does not have implicit weights.
1876 */
1877
1878 940230 static inline uint16 *my_char_weight_addr(MY_UCA_INFO *uca, my_wc_t wc) {
1879 uint page, ofst;
1880
1/2
✓ Branch 0 taken 940230 times.
✗ Branch 1 not taken.
1880460 return wc > uca->maxchar ? nullptr
1881 940230 : (uca->weights[page = (wc >> 8)]
1882
1/2
✓ Branch 0 taken 940230 times.
✗ Branch 1 not taken.
940230 ? uca->weights[page] + (ofst = (wc & 0xFF)) *
1883 940230 uca->lengths[page]
1884 940230 : nullptr);
1885 }
1886
1887 /**
1888 Helper function:
1889 Find address of weights of the given code point, for UCA 9.0.0 format.
1890
1891 @param uca Pointer to UCA data
1892 @param wc character Unicode code point
1893
1894 @return Weight array
1895 @retval pointer to weight array for the given code point,
1896 or nullptr if this page does not have implicit weights.
1897 */
1898
1899 344245426 static inline uint16 *my_char_weight_addr_900(MY_UCA_INFO *uca, my_wc_t wc) {
1900
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 344245426 times.
344245426 if (wc > uca->maxchar) return nullptr;
1901
1902 344245426 uint page = wc >> 8;
1903 344245426 uint ofst = wc & 0xFF;
1904 344245426 uint16 *weights = uca->weights[page];
1905
2/2
✓ Branch 0 taken 344241812 times.
✓ Branch 1 taken 3614 times.
344245426 if (weights)
1906 344241812 return UCA900_WEIGHT_ADDR(weights, /*level=*/0, ofst);
1907 else
1908 3614 return nullptr;
1909 }
1910
1911 /*
1912 Compares two strings according to the collation,
1913 ignoring trailing spaces.
1914
1915 SYNOPSIS:
1916 my_strnncollsp_uca()
1917 cs Character set information
1918 s First string
1919 slen First string length
1920 t Second string
1921 tlen Second string length
1922
1923 NOTES:
1924 Works exactly the same with my_strnncoll_uca(),
1925 but ignores trailing spaces.
1926
1927 In the while() comparison these situations are possible:
1928 1. (s_res>0) and (t_res>0) and (s_res == t_res)
1929 Weights are the same so far, continue comparison
1930 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
1931 A difference has been found, return.
1932 3. (s_res>0) and (t_res<0)
1933 We have reached the end of the second string, or found
1934 an illegal multibyte sequence in the second string.
1935 Compare the first string to an infinite array of
1936 space characters until difference is found, or until
1937 the end of the first string.
1938 4. (s_res<0) and (t_res>0)
1939 We have reached the end of the first string, or found
1940 an illegal multibyte sequence in the first string.
1941 Compare the second string to an infinite array of
1942 space characters until difference is found or until
1943 the end of the second steing.
1944 5. (s_res<0) and (t_res<0)
1945 Both scanners returned -1. It means we have riched
1946 the end-of-string of illegal-sequence in both strings
1947 at the same time. Return 0, strings are equal.
1948
1949 RETURN
1950 Difference between two strings, according to the collation:
1951 0 - means strings are equal
1952 negative number - means the first string is smaller
1953 positive number - means the first string is bigger
1954 */
1955
1956 template <class Mb_wc>
1957 572150 static int my_strnncollsp_uca(const CHARSET_INFO *cs, Mb_wc mb_wc,
1958 const uchar *s, size_t slen, const uchar *t,
1959 size_t tlen) {
1960 int s_res, t_res;
1961
1962 572150 uca_scanner_any<Mb_wc> sscanner(mb_wc, cs, s, slen);
1963 571952 uca_scanner_any<Mb_wc> tscanner(mb_wc, cs, t, tlen);
1964
1965 do {
1966
2/2
✓ Branch 0 taken 105108 times.
✓ Branch 1 taken 7522667 times.
15255550 s_res = sscanner.next();
1967 15276552 t_res = tscanner.next();
1968
4/4
✓ Branch 0 taken 7598545 times.
✓ Branch 1 taken 39731 times.
✓ Branch 2 taken 7352080 times.
✓ Branch 3 taken 246465 times.
15276552 } while (s_res == t_res && s_res > 0);
1969
1970
4/4
✓ Branch 0 taken 32096 times.
✓ Branch 1 taken 254100 times.
✓ Branch 2 taken 438 times.
✓ Branch 3 taken 31658 times.
572392 if (s_res > 0 && t_res < 0) {
1971 /* Calculate weight for SPACE character */
1972 876 t_res = my_space_weight(cs);
1973
1974 /* compare the first string to spaces */
1975 do {
1976
2/2
✓ Branch 0 taken 201 times.
✓ Branch 1 taken 376 times.
1154 if (s_res != t_res) return (s_res - t_res);
1977 752 s_res = sscanner.next();
1978
2/2
✓ Branch 0 taken 139 times.
✓ Branch 1 taken 237 times.
752 } while (s_res > 0);
1979 474 return 0;
1980 }
1981
1982
4/4
✓ Branch 0 taken 254099 times.
✓ Branch 1 taken 31659 times.
✓ Branch 2 taken 6213 times.
✓ Branch 3 taken 247886 times.
571516 if (s_res < 0 && t_res > 0) {
1983 /* Calculate weight for SPACE character */
1984 12426 s_res = my_space_weight(cs);
1985
1986 /* compare the second string to spaces */
1987 do {
1988
2/2
✓ Branch 0 taken 5775 times.
✓ Branch 1 taken 1448 times.
14446 if (s_res != t_res) return (s_res - t_res);
1989 2896 t_res = tscanner.next();
1990
2/2
✓ Branch 0 taken 1010 times.
✓ Branch 1 taken 438 times.
2896 } while (t_res > 0);
1991 876 return 0;
1992 }
1993
1994 559090 return (s_res - t_res);
1995 }
1996
1997 /*
1998 Calculates hash value for the given string,
1999 according to the collation, and ignoring trailing spaces.
2000
2001 SYNOPSIS:
2002 my_hash_sort_uca()
2003 cs Character set information
2004 s String
2005 slen String's length
2006 n1 First hash parameter
2007 n2 Second hash parameter
2008
2009 NOTES:
2010 Scans consequently weights and updates
2011 hash parameters n1 and n2. In a case insensitive collation,
2012 upper and lower case of the same letter will return the same
2013 weight sequence, and thus will produce the same hash values
2014 in n1 and n2.
2015
2016 RETURN
2017 N/A
2018 */
2019
2020 template <class Mb_wc>
2021 7096 static void my_hash_sort_uca(const CHARSET_INFO *cs, Mb_wc mb_wc,
2022 const uchar *s, size_t slen, uint64 *n1,
2023 uint64 *n2) {
2024 int s_res;
2025 uint64 tmp1;
2026 uint64 tmp2;
2027
2028
1/2
✓ Branch 0 taken 3548 times.
✗ Branch 1 not taken.
7096 slen = cs->cset->lengthsp(cs, pointer_cast<const char *>(s), slen);
2029 7096 uca_scanner_any<Mb_wc> scanner(mb_wc, cs, s, slen);
2030
2031 7096 tmp1 = *n1;
2032 7096 tmp2 = *n2;
2033
2034
2/2
✓ Branch 0 taken 87909 times.
✓ Branch 1 taken 3548 times.
190010 while ((s_res = scanner.next()) > 0) {
2035 175818 tmp1 ^= (((tmp1 & 63) + tmp2) * (s_res >> 8)) + (tmp1 << 8);
2036 175818 tmp2 += 3;
2037 175818 tmp1 ^= (((tmp1 & 63) + tmp2) * (s_res & 0xFF)) + (tmp1 << 8);
2038 175818 tmp2 += 3;
2039 }
2040
2041 7096 *n1 = tmp1;
2042 7096 *n2 = tmp2;
2043 7096 }
2044
2045 /*
2046 For the given string creates its "binary image", suitable
2047 to be used in binary comparison, i.e. in memcmp().
2048
2049 SYNOPSIS:
2050 my_strnxfrm_uca()
2051 cs Character set information
2052 dst Where to write the image
2053 dstlen Space available for the image, in bytes
2054 src The source string
2055 srclen Length of the source string, in bytes
2056
2057 NOTES:
2058 In a loop, scans weights from the source string and writes
2059 them into the binary image. In a case insensitive collation,
2060 upper and lower cases of the same letter will produce the
2061 same image subsequences. When we have reached the end-of-string
2062 or found an illegal multibyte sequence, the loop stops.
2063
2064 It is impossible to restore the original string using its
2065 binary image.
2066
2067 Binary images are used for bulk comparison purposes,
2068 e.g. in ORDER BY, when it is more efficient to create
2069 a binary image and use it instead of weight scanner
2070 for the original strings for every comparison.
2071
2072 RETURN
2073 Number of bytes that have been written into the binary image.
2074 */
2075
2076 template <class Mb_wc>
2077 169520438 static size_t my_strnxfrm_uca(const CHARSET_INFO *cs, Mb_wc mb_wc, uchar *dst,
2078 size_t dstlen, uint num_codepoints,
2079 const uchar *src, size_t srclen, uint flags) {
2080 169520438 uchar *d0 = dst;
2081 169520438 uchar *de = dst + dstlen;
2082 int s_res;
2083 169520438 uca_scanner_any<Mb_wc> scanner(mb_wc, cs, src, srclen);
2084
2085
6/6
✓ Branch 0 taken 180611587 times.
✓ Branch 1 taken 248 times.
✓ Branch 2 taken 95851616 times.
✓ Branch 3 taken 84759971 times.
✓ Branch 4 taken 95851616 times.
✓ Branch 5 taken 84760219 times.
722446844 while (dst < de && (s_res = scanner.next()) > 0) {
2086 191703232 *dst++ = s_res >> 8;
2087
1/2
✓ Branch 0 taken 95851616 times.
✗ Branch 1 not taken.
191703232 if (dst < de) *dst++ = s_res & 0xFF;
2088 }
2089
2090
2/2
✓ Branch 0 taken 84759971 times.
✓ Branch 1 taken 248 times.
169520438 if (dst < de) {
2091 /*
2092 PAD SPACE behavior.
2093
2094 We still have space left in the output buffer, which must mean
2095 that the scanner is at the end of the last level. Find out
2096 how many weights we wrote per level, and add any remaining
2097 spaces we need to get us up to the requested total.
2098 */
2099
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 84759971 times.
169519942 assert(num_codepoints >= scanner.get_char_index());
2100 169519942 num_codepoints -= scanner.get_char_index();
2101
2102
2/2
✓ Branch 0 taken 429464 times.
✓ Branch 1 taken 84330507 times.
169519942 if (num_codepoints) {
2103 858928 uint space_count = std::min<uint>((de - dst) / 2, num_codepoints);
2104 858928 s_res = my_space_weight(cs);
2105
2/2
✓ Branch 0 taken 1478369 times.
✓ Branch 1 taken 429464 times.
3815666 for (; space_count; space_count--) {
2106 5913476 dst = store16be(dst, s_res);
2107 }
2108 }
2109 }
2110
4/4
✓ Branch 0 taken 333471 times.
✓ Branch 1 taken 84426748 times.
✓ Branch 2 taken 333262 times.
✓ Branch 3 taken 209 times.
169520438 if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de) {
2111 666524 s_res = my_space_weight(cs);
2112
2/2
✓ Branch 0 taken 18508219 times.
✓ Branch 1 taken 333262 times.
37682962 for (; dst < de;) {
2113 37016438 *dst++ = s_res >> 8;
2114
1/2
✓ Branch 0 taken 18508219 times.
✗ Branch 1 not taken.
37016438 if (dst < de) *dst++ = s_res & 0xFF;
2115 }
2116 }
2117 169520438 return dst - d0;
2118 }
2119
2120 171893211 static int my_uca_charcmp_900(const CHARSET_INFO *cs, my_wc_t wc1,
2121 my_wc_t wc2) {
2122 171893211 uint16 *weight1_ptr = my_char_weight_addr_900(cs->uca, wc1); /* W3-TODO */
2123 171893211 uint16 *weight2_ptr = my_char_weight_addr_900(cs->uca, wc2);
2124
2125 /* Check if some of the characters does not have implicit weights */
2126
4/4
✓ Branch 0 taken 171892076 times.
✓ Branch 1 taken 1135 times.
✓ Branch 2 taken 1812 times.
✓ Branch 3 taken 171890264 times.
171893211 if (!weight1_ptr || !weight2_ptr) return wc1 != wc2;
2127
2128
5/6
✓ Branch 0 taken 171890047 times.
✓ Branch 1 taken 217 times.
✓ Branch 2 taken 171890047 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 170012081 times.
✓ Branch 5 taken 1877966 times.
171890264 if (weight1_ptr[0] && weight2_ptr[0] && weight1_ptr[0] != weight2_ptr[0])
2129 170012081 return 1;
2130
2131 /* Thoroughly compare all weights */
2132 1878183 size_t length1 = weight1_ptr[-UCA900_DISTANCE_BETWEEN_LEVELS];
2133 1878183 size_t length2 = weight2_ptr[-UCA900_DISTANCE_BETWEEN_LEVELS];
2134
2135
2/2
✓ Branch 0 taken 1878222 times.
✓ Branch 1 taken 1877954 times.
3756176 for (int level = 0; level < cs->levels_for_compare; ++level) {
2136 1878222 size_t wt_ind1 = 0;
2137 1878222 size_t wt_ind2 = 0;
2138 1878222 uint16 *weight1 = weight1_ptr + level * UCA900_DISTANCE_BETWEEN_LEVELS;
2139 1878222 uint16 *weight2 = weight2_ptr + level * UCA900_DISTANCE_BETWEEN_LEVELS;
2140
4/4
✓ Branch 0 taken 1878294 times.
✓ Branch 1 taken 1877945 times.
✓ Branch 2 taken 1878260 times.
✓ Branch 3 taken 34 times.
3756239 while (wt_ind1 < length1 && wt_ind2 < length2) {
2141 // Zero weight is ignorable.
2142
4/4
✓ Branch 0 taken 1878260 times.
✓ Branch 1 taken 238 times.
✓ Branch 2 taken 238 times.
✓ Branch 3 taken 1878022 times.
1878498 for (; wt_ind1 < length1 && !*weight1; wt_ind1++)
2143 238 weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2144
2/2
✓ Branch 0 taken 238 times.
✓ Branch 1 taken 1878022 times.
1878260 if (wt_ind1 == length1) break;
2145
2/4
✓ Branch 0 taken 1878022 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1878022 times.
1878022 for (; wt_ind2 < length2 && !*weight2; wt_ind2++)
2146 weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2147
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1878022 times.
1878022 if (wt_ind2 == length2) break;
2148
2149 // Check if these two non-ignorable weights are equal.
2150
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1878017 times.
1878022 if (*weight1 != *weight2) return 1;
2151 1878017 wt_ind1++;
2152 1878017 wt_ind2++;
2153 1878017 weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2154 1878017 weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2155 }
2156 /*
2157 If either character is out of weights but we have equality so far,
2158 check if the other character has any non-ignorable weights left.
2159 */
2160
2/2
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 1878215 times.
1878249 for (; wt_ind1 < length1; wt_ind1++) {
2161
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 32 times.
34 if (*weight1) return 1;
2162 32 weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2163 }
2164
2/2
✓ Branch 0 taken 249 times.
✓ Branch 1 taken 1877993 times.
1878242 for (; wt_ind2 < length2; wt_ind2++) {
2165
2/2
✓ Branch 0 taken 222 times.
✓ Branch 1 taken 27 times.
249 if (*weight2) return 1;
2166 27 weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2167 }
2168 }
2169 1877954 return 0;
2170 }
2171
2172 /*
2173 This function compares if two code points are the same.
2174 The sign +1 or -1 does not matter. The only
2175 important thing is that the result is 0 or not 0.
2176 This fact allows us to use memcmp() safely, on both
2177 little-endian and big-endian machines.
2178 */
2179
2180 211214580 static int my_uca_charcmp(const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) {
2181
2/2
✓ Branch 0 taken 39319260 times.
✓ Branch 1 taken 171895320 times.
211214580 if (wc1 == wc2) return 0;
2182
2183
3/4
✓ Branch 0 taken 171895320 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 171893211 times.
✓ Branch 3 taken 2109 times.
171895320 if (cs->uca != nullptr && cs->uca->version == UCA_V900)
2184 171893211 return my_uca_charcmp_900(cs, wc1, wc2);
2185
2186 size_t length1, length2;
2187 2109 uint16 *weight1 = my_char_weight_addr(cs->uca, wc1); /* W3-TODO */
2188 2109 uint16 *weight2 = my_char_weight_addr(cs->uca, wc2);
2189
2190 /* Check if some of the code points does not have implicit weights */
2191
2/4
✓ Branch 0 taken 2109 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2109 times.
2109 if (!weight1 || !weight2) return wc1 != wc2;
2192
2193 /* Quickly compare first weights */
2194
2/2
✓ Branch 0 taken 2073 times.
✓ Branch 1 taken 36 times.
2109 if (weight1[0] != weight2[0]) return 1;
2195
2196 /* Thoroughly compare all weights */
2197 36 length1 = cs->uca->lengths[wc1 >> MY_UCA_PSHIFT]; /* W3-TODO */
2198 36 length2 = cs->uca->lengths[wc2 >> MY_UCA_PSHIFT];
2199
2200
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 if (length1 > length2)
2201 return memcmp((const void *)weight1, (const void *)weight2, length2 * 2)
2202 ? 1
2203 : weight1[length2];
2204
2205
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 if (length1 < length2)
2206 return memcmp((const void *)weight1, (const void *)weight2, length1 * 2)
2207 ? 1
2208 : weight2[length1];
2209
2210 36 return memcmp((const void *)weight1, (const void *)weight2, length1 * 2);
2211 }
2212
2213 /*** Compare string against string with wildcard
2214 ** 0 if matched
2215 ** -1 if not matched with wildcard
2216 ** 1 if matched with wildcard
2217 */
2218
2219 154705681 static int my_wildcmp_uca_impl(const CHARSET_INFO *cs, const char *str,
2220 const char *str_end, const char *wildstr,
2221 const char *wildend, int escape, int w_one,
2222 int w_many, int recurse_level) {
2223
4/6
✓ Branch 0 taken 154705539 times.
✓ Branch 1 taken 142 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 154705539 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 154705681 times.
154705681 if (my_string_stack_guard && my_string_stack_guard(recurse_level)) return 1;
2224
2/2
✓ Branch 0 taken 154705566 times.
✓ Branch 1 taken 115 times.
154705681 while (wildstr != wildend) {
2225 154705566 int result = -1; /* Not found, using wildcards */
2226 154705566 auto mb_wc = cs->cset->mb_wc;
2227
2228 /*
2229 Compare the expression and pattern strings character-by-character until
2230 we find a '%' (w_many) in the pattern string. Once we do, we break out
2231 of the loop and try increasingly large widths for the '%' match,
2232 calling ourselves recursively until we find a match. (As an
2233 optimization, we test for the character immediately after '%' before we
2234 recurse.) This takes exponential time in the worst case.
2235
2236 Example: Say we are trying to match the pattern 'ab%cd' against the
2237 string 'ab..c.cd'. We first match the initial 'ab' against each other,
2238 and then see the '%' in the pattern. Since the first character after
2239 '%' is 'c', we skip to the first 'c' in the expression string, and try
2240 to match 'c.cd' against 'cd' by a recursive call. Since this failed, we
2241 scan for the next 'c', and try to match 'cd' against 'cd', which works.
2242 */
2243 my_wc_t w_wc;
2244 while (true) {
2245 int mb_len;
2246
1/2
✓ Branch 0 taken 197485191 times.
✗ Branch 1 not taken.
197485191 if ((mb_len = mb_wc(cs, &w_wc, (const uchar *)wildstr,
2247
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 197485191 times.
197485191 (const uchar *)wildend)) <= 0)
2248 153079399 return 1;
2249
2250 197485191 wildstr += mb_len;
2251 // If we found '%' (w_many), break out this loop.
2252
2/2
✓ Branch 0 taken 1626167 times.
✓ Branch 1 taken 195859024 times.
197485191 if (w_wc == (my_wc_t)w_many) {
2253 1626167 result = 1;
2254 1626167 break;
2255 }
2256
2257 /*
2258 If the character we just read was an escape character, skip it and
2259 read the next character instead. This character is used verbatim
2260 without checking if it is a wildcard (% or _). However, as a
2261 special exception, a lone escape character at the end of a string is
2262 treated as itself.
2263 */
2264 195859024 bool escaped = false;
2265
4/4
✓ Branch 0 taken 2626 times.
✓ Branch 1 taken 195856398 times.
✓ Branch 2 taken 2618 times.
✓ Branch 3 taken 8 times.
195859024 if (w_wc == (my_wc_t)escape && wildstr < wildend) {
2266
1/2
✓ Branch 0 taken 2618 times.
✗ Branch 1 not taken.
2618 if ((mb_len = mb_wc(cs, &w_wc, (const uchar *)wildstr,
2267
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2618 times.
2618 (const uchar *)wildend)) <= 0)
2268 return 1;
2269 2618 wildstr += mb_len;
2270 2618 escaped = true;
2271 }
2272
2273 my_wc_t s_wc;
2274
1/2
✓ Branch 0 taken 195859024 times.
✗ Branch 1 not taken.
195859024 if ((mb_len = mb_wc(cs, &s_wc, (const uchar *)str,
2275
2/2
✓ Branch 0 taken 19480 times.
✓ Branch 1 taken 195839544 times.
195859024 (const uchar *)str_end)) <= 0)
2276 19480 return 1;
2277 195839544 str += mb_len;
2278
2279 // If we found '_' (w_one), skip one character in expression string.
2280
4/4
✓ Branch 0 taken 195836942 times.
✓ Branch 1 taken 2602 times.
✓ Branch 2 taken 3570221 times.
✓ Branch 3 taken 192266721 times.
195839544 if (!escaped && w_wc == (my_wc_t)w_one) {
2281 3570221 result = 1;
2282 } else {
2283
2/2
✓ Branch 0 taken 152767422 times.
✓ Branch 1 taken 39501901 times.
192269323 if (my_uca_charcmp(cs, s_wc, w_wc)) return 1;
2284 }
2285
2/2
✓ Branch 0 taken 292497 times.
✓ Branch 1 taken 42779625 times.
43072122 if (wildstr == wildend)
2286 292497 return (str != str_end); /* Match if both are at end */
2287 42779625 }
2288
2289
1/2
✓ Branch 0 taken 1626167 times.
✗ Branch 1 not taken.
1626167 if (w_wc == (my_wc_t)w_many) {
2290 // Remove any '%' and '_' following w_many in the pattern string.
2291 for (;;) {
2292
2/2
✓ Branch 0 taken 542052 times.
✓ Branch 1 taken 1098741 times.
1640793 if (wildstr == wildend) {
2293 /*
2294 The previous w_many (%) was the last character in the pattern
2295 string, so we have a match no matter what the rest of the
2296 expression string looks like (even empty).
2297 */
2298 542052 return 0;
2299 }
2300 int mb_len_wild =
2301
1/2
✓ Branch 0 taken 1098741 times.
✗ Branch 1 not taken.
1098741 mb_wc(cs, &w_wc, (const uchar *)wildstr, (const uchar *)wildend);
2302
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1098741 times.
1098741 if (mb_len_wild <= 0) return 1;
2303 1098741 wildstr += mb_len_wild;
2304
2/2
✓ Branch 0 taken 2779 times.
✓ Branch 1 taken 1095962 times.
1098741 if (w_wc == (my_wc_t)w_many) continue;
2305
2306
2/2
✓ Branch 0 taken 11847 times.
✓ Branch 1 taken 1084115 times.
1095962 if (w_wc == (my_wc_t)w_one) {
2307 /*
2308 Skip one character in expression string because '_' needs to
2309 match one.
2310 */
2311 my_wc_t s_wc;
2312 int mb_len =
2313
1/2
✓ Branch 0 taken 11847 times.
✗ Branch 1 not taken.
11847 mb_wc(cs, &s_wc, (const uchar *)str, (const uchar *)str_end);
2314
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11847 times.
11847 if (mb_len <= 0) return 1;
2315 11847 str += mb_len;
2316 11847 continue;
2317 11847 }
2318 1084115 break; /* Not a wild character */
2319 14626 }
2320
2321 // No character in the expression string to match w_wc.
2322
2/2
✓ Branch 0 taken 697 times.
✓ Branch 1 taken 1083418 times.
1084115 if (str == str_end) return -1;
2323
2324 // Skip the escape character ('\') in the pattern if needed.
2325
4/4
✓ Branch 0 taken 120 times.
✓ Branch 1 taken 1083298 times.
✓ Branch 2 taken 113 times.
✓ Branch 3 taken 7 times.
1083418 if (w_wc == (my_wc_t)escape && wildstr < wildend) {
2326 int mb_len =
2327
1/2
✓ Branch 0 taken 113 times.
✗ Branch 1 not taken.
113 mb_wc(cs, &w_wc, (const uchar *)wildstr, (const uchar *)wildend);
2328
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 113 times.
113 if (mb_len <= 0) return 1;
2329 113 wildstr += mb_len;
2330 }
2331
2332 /*
2333 w_wc is now the character following w_many (e.g., if the pattern is
2334 "a%c", w_wc is 'c').
2335 */
2336 while (true) {
2337 /*
2338 Skip until we find a character in the expression string that is
2339 equal to w_wc.
2340 */
2341 2552921 int mb_len = 0;
2342
2/2
✓ Branch 0 taken 18945257 times.
✓ Branch 1 taken 857580 times.
19802837 while (str != str_end) {
2343 my_wc_t s_wc;
2344
1/2
✓ Branch 0 taken 18945257 times.
✗ Branch 1 not taken.
18945257 if ((mb_len = mb_wc(cs, &s_wc, (const uchar *)str,
2345
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18945257 times.
18945257 (const uchar *)str_end)) <= 0)
2346 return 1;
2347
2348
2/2
✓ Branch 0 taken 1695341 times.
✓ Branch 1 taken 17249916 times.
18945257 if (!my_uca_charcmp(cs, s_wc, w_wc)) break;
2349 17249916 str += mb_len;
2350 }
2351 // No character in the expression string is equal to w_wc.
2352
2/2
✓ Branch 0 taken 857580 times.
✓ Branch 1 taken 1695341 times.
2552921 if (str == str_end) return -1;
2353 1695341 str += mb_len;
2354
2355 /*
2356 The strings match up until the first character after w_many in the
2357 pattern string. For the rest part of pattern string and expression
2358 string, we recursively call to get wild compare result.
2359 Example, wildcmp(..., "abcdefg", "a%de%g", ...), we'll run again on
2360 wildcmp(..., "efg", "e%g", ...).
2361 */
2362
1/2
✓ Branch 0 taken 1695341 times.
✗ Branch 1 not taken.
1695341 result = my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, escape,
2363 w_one, w_many, recurse_level + 1);
2364
2365
2/2
✓ Branch 0 taken 225838 times.
✓ Branch 1 taken 1469503 times.
1695341 if (result <= 0) return result;
2366 1469503 }
2367 }
2368 }
2369
2/2
✓ Branch 0 taken 69 times.
✓ Branch 1 taken 46 times.
115 return (str != str_end ? 1 : 0);
2370 }
2371
2372 19926 static int my_strcasecmp_uca(const CHARSET_INFO *cs, const char *s,
2373 const char *t) {
2374 19926 const MY_UNICASE_INFO *uni_plane = cs->caseinfo;
2375 const MY_UNICASE_CHARACTER *page;
2376
4/4
✓ Branch 0 taken 40839 times.
✓ Branch 1 taken 3642 times.
✓ Branch 2 taken 40380 times.
✓ Branch 3 taken 459 times.
44481 while (s[0] && t[0]) {
2377 my_wc_t s_wc, t_wc;
2378
2379
1/2
✓ Branch 0 taken 40380 times.
✗ Branch 1 not taken.
40380 if (static_cast<uchar>(s[0]) < 128) {
2380 40380 s_wc = uni_plane->page[0][static_cast<uchar>(s[0])].tolower;
2381 40380 s++;
2382 } else {
2383 int res;
2384
2385 res = cs->cset->mb_wc(cs, &s_wc, pointer_cast<const uchar *>(s),
2386 pointer_cast<const uchar *>(s + 4));
2387
2388
0/2
✗ Branch 0 not taken.
✗ Branch 1 not taken.
15825 if (res <= 0) return strcmp(s, t);
2389 s += res;
2390 if (s_wc <= uni_plane->maxchar && (page = uni_plane->page[s_wc >> 8]))
2391 s_wc = page[s_wc & 0xFF].tolower;
2392 }
2393
2394 /* Do the same for the second string */
2395
2396
1/2
✓ Branch 0 taken 40380 times.
✗ Branch 1 not taken.
40380 if (static_cast<uchar>(t[0]) < 128) {
2397 /* Convert single byte character into weight */
2398 40380 t_wc = uni_plane->page[0][static_cast<uchar>(t[0])].tolower;
2399 40380 t++;
2400 } else {
2401 int res = cs->cset->mb_wc(cs, &t_wc, pointer_cast<const uchar *>(t),
2402 pointer_cast<const uchar *>(t + 4));
2403 if (res <= 0) return strcmp(s, t);
2404 t += res;
2405
2406 if (t_wc <= uni_plane->maxchar && (page = uni_plane->page[t_wc >> 8]))
2407 t_wc = page[t_wc & 0xFF].tolower;
2408 }
2409
2410 /* Now we have two weights, let's compare them */
2411
2/2
✓ Branch 0 taken 15825 times.
✓ Branch 1 taken 24555 times.
40380 if (s_wc != t_wc) return static_cast<int>(s_wc) - static_cast<int>(t_wc);
2412 }
2413 4101 return static_cast<int>(static_cast<uchar>(s[0])) -
2414 4101 static_cast<int>(static_cast<uchar>(t[0]));
2415 }
2416
2417 extern "C" {
2418 153010340 static int my_wildcmp_uca(const CHARSET_INFO *cs, const char *str,
2419 const char *str_end, const char *wildstr,
2420 const char *wildend, int escape, int w_one,
2421 int w_many) {
2422 153010340 return my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, escape, w_one,
2423 153010340 w_many, 1);
2424 }
2425 } // extern "C"
2426
2427 /*
2428 Collation language is implemented according to
2429 subset of ICU Collation Customization (tailorings):
2430 http://icu.sourceforge.net/userguide/Collate_Customization.html
2431
2432 Collation language elements:
2433 Delimiters:
2434 space - skipped
2435
2436 <char> := A-Z | a-z | \uXXXX
2437
2438 Shift command:
2439 <shift> := & - reset at this letter.
2440
2441 Diff command:
2442 <d1> := < - Identifies a primary difference.
2443 <d2> := << - Identifies a secondary difference.
2444 <d3> := <<< - Idenfifies a tertiary difference.
2445
2446
2447 Collation rules:
2448 <ruleset> := <rule> { <ruleset> }
2449
2450 <rule> := <d1> <string>
2451 | <d2> <string>
2452 | <d3> <string>
2453 | <shift> <char>
2454
2455 <string> := <char> [ <string> ]
2456
2457 An example, Polish collation:
2458
2459 &A < \u0105 <<< \u0104
2460 &C < \u0107 <<< \u0106
2461 &E < \u0119 <<< \u0118
2462 &L < \u0142 <<< \u0141
2463 &N < \u0144 <<< \u0143
2464 &O < \u00F3 <<< \u00D3
2465 &S < \u015B <<< \u015A
2466 &Z < \u017A <<< \u017B
2467 */
2468
2469 typedef enum my_coll_lexem_num_en {
2470 MY_COLL_LEXEM_EOF = 0,
2471 MY_COLL_LEXEM_SHIFT = 1,
2472 MY_COLL_LEXEM_RESET = 4,
2473 MY_COLL_LEXEM_CHAR = 5,
2474 MY_COLL_LEXEM_ERROR = 6,
2475 MY_COLL_LEXEM_OPTION = 7,
2476 MY_COLL_LEXEM_EXTEND = 8,
2477 MY_COLL_LEXEM_CONTEXT = 9
2478 } my_coll_lexem_num;
2479
2480 /**
2481 Convert collation customization lexem to string,
2482 for nice error reporting
2483
2484 @param term lexem code
2485
2486 @return lexem name
2487 */
2488
2489 1 static const char *my_coll_lexem_num_to_str(my_coll_lexem_num term) {
2490
1/9
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
1 switch (term) {
2491 case MY_COLL_LEXEM_EOF:
2492 return "EOF";
2493 1 case MY_COLL_LEXEM_SHIFT:
2494 1 return "Shift";
2495 case MY_COLL_LEXEM_RESET:
2496 return "&";
2497 case MY_COLL_LEXEM_CHAR:
2498 return "Character";
2499 case MY_COLL_LEXEM_OPTION:
2500 return "Bracket option";
2501 case MY_COLL_LEXEM_EXTEND:
2502 return "/";
2503 case MY_COLL_LEXEM_CONTEXT:
2504 return "|";
2505 case MY_COLL_LEXEM_ERROR:
2506 return "ERROR";
2507 }
2508 return nullptr;
2509 }
2510
2511 struct MY_COLL_LEXEM {
2512 my_coll_lexem_num term;
2513 const char *beg;
2514 const char *end;
2515 const char *prev;
2516 int diff;
2517 int code;
2518 };
2519
2520 /*
2521 Initialize collation rule lexical anilizer
2522
2523 SYNOPSIS
2524 my_coll_lexem_init
2525 lexem Lex analyzer to init
2526 str Const string to parse
2527 str_end End of the string
2528 USAGE
2529
2530 RETURN VALUES
2531 N/A
2532 */
2533
2534 26380 static void my_coll_lexem_init(MY_COLL_LEXEM *lexem, const char *str,
2535 const char *str_end) {
2536 26380 lexem->beg = str;
2537 26380 lexem->prev = str;
2538 26380 lexem->end = str_end;
2539 26380 lexem->diff = 0;
2540 26380 lexem->code = 0;
2541 26380 }
2542
2543 /**
2544 Compare lexem to string with length
2545
2546 @param lexem lexem
2547 @param pattern string
2548 @param patternlen string length
2549
2550 @retval 0 if lexem is equal to string, non-0 otherwise.
2551 */
2552
2553 114801 static int lex_cmp(MY_COLL_LEXEM *lexem, const char *pattern,
2554 size_t patternlen) {
2555 114801 size_t lexemlen = lexem->beg - lexem->prev;
2556
2/2
✓ Branch 0 taken 57224 times.
✓ Branch 1 taken 57577 times.
114801 if (lexemlen < patternlen) return 1; /* Not a prefix */
2557 57577 return native_strncasecmp(lexem->prev, pattern, patternlen);
2558 }
2559
2560 /*
2561 Print collation customization expression parse error, with context.
2562
2563 SYNOPSIS
2564 my_coll_lexem_print_error
2565 lexem Lex analyzer to take context from
2566 errstr string to write error to
2567 errsize errstr size
2568 txt error message
2569 col_name collation name
2570 USAGE
2571
2572 RETURN VALUES
2573 N/A
2574 */
2575
2576 3 static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem, char *errstr,
2577 size_t errsize, const char *txt,
2578 const char *col_name) {
2579 char tail[30];
2580 3 size_t len = lexem->end - lexem->prev;
2581
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 strmake(tail, lexem->prev, std::min(len, sizeof(tail) - 1));
2582 3 errstr[errsize - 1] = '\0';
2583 3 snprintf(errstr, errsize - 1, "%s at '%s' for COLLATION : %s",
2584
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 txt[0] ? txt : "Syntax error", tail, col_name);
2585 3 }
2586
2587 /*
2588 Convert a hex digit into its numeric value
2589
2590 SYNOPSIS
2591 ch2x
2592 ch hex digit to convert
2593 USAGE
2594
2595 RETURN VALUES
2596 an integer value in the range 0..15
2597 -1 on error
2598 */
2599
2600 6049741 static int ch2x(int ch) {
2601
4/4
✓ Branch 0 taken 5353184 times.
✓ Branch 1 taken 696557 times.
✓ Branch 2 taken 3253319 times.
✓ Branch 3 taken 2099865 times.
6049741 if (ch >= '0' && ch <= '9') return ch - '0';
2602
2603
4/4
✓ Branch 0 taken 190176 times.
✓ Branch 1 taken 2606246 times.
✓ Branch 2 taken 4074 times.
✓ Branch 3 taken 186102 times.
2796422 if (ch >= 'a' && ch <= 'f') return 10 + ch - 'a';
2604
2605
4/4
✓ Branch 0 taken 1790386 times.
✓ Branch 1 taken 1001962 times.
✓ Branch 2 taken 1593929 times.
✓ Branch 3 taken 196457 times.
2792348 if (ch >= 'A' && ch <= 'F') return 10 + ch - 'A';
2606
2607 1198419 return -1;
2608 }
2609
2610 /*
2611 Collation language lexical parser:
2612 Scans the next lexem.
2613
2614 SYNOPSIS
2615 my_coll_lexem_next
2616 lexem Lex analyzer, previously initialized by
2617 my_coll_lexem_init.
2618 USAGE
2619 Call this function in a loop
2620
2621 RETURN VALUES
2622 Lexem number: eof, diff, shift, char or error.
2623 */
2624
2625 3181669 static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) {
2626 const char *beg;
2627 my_coll_lexem_num rc;
2628
2629
2/2
✓ Branch 0 taken 4627380 times.
✓ Branch 1 taken 79135 times.
4706515 for (beg = lexem->beg; beg < lexem->end; beg++) {
2630
8/8
✓ Branch 0 taken 1524846 times.
✓ Branch 1 taken 23687 times.
✓ Branch 2 taken 267272 times.
✓ Branch 3 taken 243295 times.
✓ Branch 4 taken 51454 times.
✓ Branch 5 taken 186122 times.
✓ Branch 6 taken 730323 times.
✓ Branch 7 taken 1600381 times.
4627380 switch (*beg) {
2631 1524846 case ' ':
2632 case '\t':
2633 case '\r':
2634 case '\n':
2635 1524846 continue;
2636
2637 23687 case '[': /* Bracket expression, e.g. "[optimize [a-z]]" */
2638 {
2639 size_t nbrackets; /* Indicates nested recursion level */
2640
1/2
✓ Branch 0 taken 214376 times.
✗ Branch 1 not taken.
214376 for (beg++, nbrackets = 1; beg < lexem->end; beg++) {
2641
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 214376 times.
214376 if (*beg == '[') /* Enter nested bracket expression */
2642 nbrackets++;
2643
2/2
✓ Branch 0 taken 23687 times.
✓ Branch 1 taken 190689 times.
214376 else if (*beg == ']') {
2644
1/2
✓ Branch 0 taken 23687 times.
✗ Branch 1 not taken.
23687 if (--nbrackets == 0) {
2645 23687 rc = MY_COLL_LEXEM_OPTION;
2646 23687 beg++;
2647 23687 goto ex;
2648 }
2649 }
2650 }
2651 rc = MY_COLL_LEXEM_ERROR;
2652 goto ex;
2653 }
2654
2655 267272 case '&':
2656 267272 beg++;
2657 267272 rc = MY_COLL_LEXEM_RESET;
2658 267272 goto ex;
2659
2660 243295 case '=':
2661 243295 beg++;
2662 243295 lexem->diff = 0;
2663 243295 rc = MY_COLL_LEXEM_SHIFT;
2664 243295 goto ex;
2665
2666 51454 case '/':
2667 51454 beg++;
2668 51454 rc = MY_COLL_LEXEM_EXTEND;
2669 51454 goto ex;
2670
2671 186122 case '|':
2672 186122 beg++;
2673 186122 rc = MY_COLL_LEXEM_CONTEXT;
2674 186122 goto ex;
2675
2676 730323 case '<': /* Shift: '<' or '<<' or '<<<' or '<<<<' */
2677 {
2678 /* Scan up to 3 additional '<' characters */
2679 730323 for (beg++, lexem->diff = 1;
2680
4/6
✓ Branch 0 taken 1538561 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 808238 times.
✓ Branch 3 taken 730323 times.
✓ Branch 4 taken 808238 times.
✗ Branch 5 not taken.
1538561 (beg < lexem->end) && (*beg == '<') && (lexem->diff <= 3);
2681 808238 beg++, lexem->diff++)
2682 ;
2683 730323 rc = MY_COLL_LEXEM_SHIFT;
2684 730323 goto ex;
2685 }
2686 1600381 default:
2687 1600381 break;
2688 1524846 }
2689
2690 /* Escaped character, e.g. \u1234 */
2691
4/6
✓ Branch 0 taken 1210398 times.
✓ Branch 1 taken 389983 times.
✓ Branch 2 taken 1210398 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1210398 times.
✗ Branch 5 not taken.
1600381 if ((*beg == '\\') && (beg + 2 < lexem->end) && (beg[1] == 'u') &&
2692
2/2
✓ Branch 0 taken 1210396 times.
✓ Branch 1 taken 2 times.
1210398 my_isxdigit(&my_charset_utf8_general_ci, beg[2])) {
2693 int ch;
2694
2695 1210396 beg += 2;
2696 1210396 lexem->code = 0;
2697
6/6
✓ Branch 0 taken 6049741 times.
✓ Branch 1 taken 11977 times.
✓ Branch 2 taken 4851322 times.
✓ Branch 3 taken 1198419 times.
✓ Branch 4 taken 4851322 times.
✓ Branch 5 taken 1210396 times.
6061718 while ((beg < lexem->end) && ((ch = ch2x(beg[0])) >= 0)) {
2698 4851322 lexem->code = (lexem->code << 4) + ch;
2699 4851322 beg++;
2700 }
2701 1210396 rc = MY_COLL_LEXEM_CHAR;
2702 1210396 goto ex;
2703 }
2704
2705 /*
2706 Unescaped single byte character:
2707 allow printable ASCII range except SPACE and
2708 special characters parsed above []<&/|=
2709 */
2710
3/4
✓ Branch 0 taken 305873 times.
✓ Branch 1 taken 84112 times.
✓ Branch 2 taken 305873 times.
✗ Branch 3 not taken.
389985 if (*beg >= 0x21 && *beg <= 0x7E) {
2711 305873 lexem->code = *beg++;
2712 305873 rc = MY_COLL_LEXEM_CHAR;
2713 305873 goto ex;
2714 }
2715
2716
1/2
✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
84112 if (((uchar)*beg) > 0x7F) /* Unescaped multibyte character */
2717 {
2718 84112 CHARSET_INFO *cs = &my_charset_utf8_general_ci;
2719 my_wc_t wc;
2720
1/2
✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
84112 int nbytes = cs->cset->mb_wc(cs, &wc, pointer_cast<const uchar *>(beg),
2721 84112 pointer_cast<const uchar *>(lexem->end));
2722
1/2
✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
84112 if (nbytes > 0) {
2723 84112 rc = MY_COLL_LEXEM_CHAR;
2724 84112 beg += nbytes;
2725 84112 lexem->code = (int)wc;
2726 84112 goto ex;
2727 }
2728 }
2729
2730 rc = MY_COLL_LEXEM_ERROR;
2731 goto ex;
2732 }
2733 79135 rc = MY_COLL_LEXEM_EOF;
2734
2735 3181669 ex:
2736 3181669 lexem->prev = lexem->beg;
2737 3181669 lexem->beg = beg;
2738 3181669 lexem->term = rc;
2739 3181669 return rc;
2740 }
2741
2742 /*
2743 Collation rule item
2744 */
2745
2746 #define MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */
2747
2748 struct MY_COLL_RULE {
2749 my_wc_t base[MY_UCA_MAX_EXPANSION]; /* Base character */
2750 my_wc_t curr[MY_UCA_MAX_CONTRACTION]; /* Current character */
2751 int diff[4]; /* Primary, Secondary, Tertiary, Quaternary difference */
2752 size_t before_level; /* "reset before" indicator */
2753 bool with_context;
2754 };
2755
2756 /**
2757 Return length of the "reset" string of a rule.
2758
2759 @param r Collation customization rule
2760
2761 @return Length of r->base
2762 */
2763
2764 1146168 static inline size_t my_coll_rule_reset_length(MY_COLL_RULE *r) {
2765 1146168 return my_wstrnlen(r->base, MY_UCA_MAX_EXPANSION);
2766 }
2767
2768 /**
2769 Return length of the "shift" string of a rule.
2770
2771 @param r Collation customization rule
2772
2773 @return Length of r->base
2774 */
2775
2776 1146168 static inline size_t my_coll_rule_shift_length(MY_COLL_RULE *r) {
2777 1146168 return my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION);
2778 }
2779
2780 /**
2781 Append new character to the end of a 0-terminated wide string.
2782
2783 @param wc Wide string
2784 @param limit Maximum possible result length
2785 @param code Character to add
2786
2787 @return 1 if character was added, 0 if string was too long
2788 */
2789
2790 1600497 static int my_coll_rule_expand(my_wc_t *wc, size_t limit, my_wc_t code) {
2791 size_t i;
2792
1/2
✓ Branch 0 taken 1789348 times.
✗ Branch 1 not taken.
1789348 for (i = 0; i < limit; i++) {
2793
2/2
✓ Branch 0 taken 1600497 times.
✓ Branch 1 taken 188851 times.
1789348 if (wc[i] == 0) {
2794 1600497 wc[i] = code;
2795 1600497 return 1;
2796 }
2797 }
2798 return 0;
2799 }
2800
2801 /**
2802 Initialize collation customization rule
2803
2804 @param r Rule
2805 */
2806
2807 267272 static void my_coll_rule_reset(MY_COLL_RULE *r) { memset(r, 0, sizeof(*r)); }
2808
2809 /*
2810 Shift methods:
2811 Simple: "&B < C" : weight('C') = weight('B') + 1
2812 Expand: weight('C') = { weight('B'), weight(last_non_ignorable) + 1 }
2813 */
2814 typedef enum {
2815 my_shift_method_simple = 0,
2816 my_shift_method_expand
2817 } my_coll_shift_method;
2818
2819 struct MY_COLL_RULES {
2820 MY_UCA_INFO *uca; /* Unicode weight data */
2821 size_t nrules; /* Number of rules in the rule array */
2822 size_t mrules; /* Number of allocated rules */
2823 MY_COLL_RULE *rule; /* Rule array */
2824 MY_CHARSET_LOADER *loader;
2825 my_coll_shift_method shift_after_method;
2826 };
2827
2828 /**
2829 Realloc rule array to a new size.
2830 Reallocate memory for 128 additional rules at once,
2831 to reduce the number of reallocs, which is important
2832 for long tailorings (e.g. for East Asian collations).
2833
2834 @param rules Rule container
2835 @param n new number of rules
2836
2837 @return 0 on success, -1 on error.
2838 */
2839
2840 1146168 static int my_coll_rules_realloc(MY_COLL_RULES *rules, size_t n) {
2841
3/4
✓ Branch 0 taken 28144 times.
✓ Branch 1 taken 1118024 times.
✓ Branch 2 taken 1146168 times.
✗ Branch 3 not taken.
1174312 if (rules->nrules < rules->mrules ||
2842
1/2
✓ Branch 0 taken 28144 times.
✗ Branch 1 not taken.
28144 (rules->rule = static_cast<MY_COLL_RULE *>(rules->loader->mem_realloc(
2843 28144 rules->rule, sizeof(MY_COLL_RULE) * (rules->mrules = n + 128)))))
2844 1146168 return 0;
2845 return -1;
2846 }
2847
2848 /**
2849 Append one new rule to a rule array
2850
2851 @param rules Rule container
2852 @param rule New rule to add
2853
2854 @return 0 on success, -1 on error.
2855 */
2856
2857 1146168 static int my_coll_rules_add(MY_COLL_RULES *rules, MY_COLL_RULE *rule) {
2858
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1146168 times.
1146168 if (my_coll_rules_realloc(rules, rules->nrules + 1)) return -1;
2859 1146168 rules->rule[rules->nrules++] = rule[0];
2860 1146168 return 0;
2861 }
2862
2863 /**
2864 Apply difference at level
2865
2866 @param r Rule
2867 @param level Level (0,1,2,3,4)
2868 */
2869
2870 973618 static void my_coll_rule_shift_at_level(MY_COLL_RULE *r, int level) {
2871
5/6
✓ Branch 0 taken 34184 times.
✓ Branch 1 taken 310396 times.
✓ Branch 2 taken 84894 times.
✓ Branch 3 taken 300849 times.
✓ Branch 4 taken 243295 times.
✗ Branch 5 not taken.
973618 switch (level) {
2872 34184 case 4: /* Quaternary difference */
2873 34184 r->diff[3]++;
2874 34184 break;
2875 310396 case 3: /* Tertiary difference */
2876 310396 r->diff[2]++;
2877 310396 r->diff[3] = 0;
2878 310396 break;
2879 84894 case 2: /* Secondary difference */
2880 84894 r->diff[1]++;
2881 84894 r->diff[2] = r->diff[3] = 0;
2882 84894 break;
2883 300849 case 1: /* Primary difference */
2884 300849 r->diff[0]++;
2885 300849 r->diff[1] = r->diff[2] = r->diff[3] = 0;
2886 300849 break;
2887 243295 case 0:
2888 /* Do nothing for '=': use the previous offsets for all levels */
2889 243295 break;
2890 default:
2891 assert(0);
2892 }
2893 973618 }
2894
2895 struct MY_COLL_RULE_PARSER {
2896 MY_COLL_LEXEM tok[2]; /* Current token and next token for look-ahead */
2897 MY_COLL_RULE rule; /* Currently parsed rule */
2898 MY_COLL_RULES *rules; /* Rule list pointer */
2899 char errstr[128]; /* Error message */
2900 };
2901
2902 /**
2903 Current parser token
2904
2905 @param p Collation customization parser
2906
2907 @return Pointer to the current token
2908 */
2909
2910 14874854 static MY_COLL_LEXEM *my_coll_parser_curr(MY_COLL_RULE_PARSER *p) {
2911 14874854 return &p->tok[0];
2912 }
2913
2914 /**
2915 Next parser token, to look ahead.
2916
2917 @param p Collation customization parser
2918
2919 @return Pointer to the next token
2920 */
2921
2922 6310578 static MY_COLL_LEXEM *my_coll_parser_next(MY_COLL_RULE_PARSER *p) {
2923 6310578 return &p->tok[1];
2924 }
2925
2926 /**
2927 Scan one token from the input stream
2928
2929 @param p Collation customization parser
2930
2931 @return 1, for convenience, to use in logical expressions easier.
2932 */
2933 3128909 static int my_coll_parser_scan(MY_COLL_RULE_PARSER *p) {
2934 3128909 my_coll_parser_curr(p)[0] = my_coll_parser_next(p)[0];
2935 3128909 my_coll_lexem_next(my_coll_parser_next(p));
2936 3128909 return 1;
2937 }
2938
2939 /**
2940 Initialize collation customization parser
2941
2942 @param p Collation customization parser
2943 @param rules Where to store rules
2944 @param str Beginning of a collation customization string
2945 @param str_end End of the collation customizations string
2946 */
2947
2948 26380 static void my_coll_parser_init(MY_COLL_RULE_PARSER *p, MY_COLL_RULES *rules,
2949 const char *str, const char *str_end) {
2950 /*
2951 Initialize parser to the input buffer and scan two tokens,
2952 to make the current token and the next token known.
2953 */
2954 26380 memset(p, 0, sizeof(*p));
2955 26380 p->rules = rules;
2956 26380 p->errstr[0] = '\0';
2957 26380 my_coll_lexem_init(my_coll_parser_curr(p), str, str_end);
2958 26380 my_coll_lexem_next(my_coll_parser_curr(p));
2959 26380 my_coll_parser_next(p)[0] = my_coll_parser_curr(p)[0];
2960 26380 my_coll_lexem_next(my_coll_parser_next(p));
2961 26380 }
2962
2963 /**
2964 Display error when an unexpected token found
2965
2966 @param p Collation customization parser
2967 @param term Which lexem was expected
2968
2969 @return 0, to use in "return" and boolean expressions.
2970 */
2971
2972 1 static int my_coll_parser_expected_error(MY_COLL_RULE_PARSER *p,
2973 my_coll_lexem_num term) {
2974 1 snprintf(p->errstr, sizeof(p->errstr), "%s expected",
2975 my_coll_lexem_num_to_str(term));
2976 1 return 0;
2977 }
2978
2979 /**
2980 Display error when a too long character sequence is met
2981
2982 @param p Collation customization parser
2983 @param name Which kind of sequence: contraction, expansion, etc.
2984
2985 @return 0, to use in "return" and boolean expressions.
2986 */
2987
2988 static int my_coll_parser_too_long_error(MY_COLL_RULE_PARSER *p,
2989 const char *name) {
2990 snprintf(p->errstr, sizeof(p->errstr), "%s is too long", name);
2991 return 0;
2992 }
2993
2994 /**
2995 Scan the given lexem from input stream, or display "expected" error.
2996
2997 @param p Collation customization parser
2998 @param term Which lexem is expected.
2999
3000 @retval 0 if the required term was not found.
3001 @retval 1 if the required term was found.
3002 */
3003 1772045 static int my_coll_parser_scan_term(MY_COLL_RULE_PARSER *p,
3004 my_coll_lexem_num term) {
3005
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1772045 times.
1772045 if (my_coll_parser_curr(p)->term != term)
3006 return my_coll_parser_expected_error(p, term);
3007 1772045 return my_coll_parser_scan(p);
3008 }
3009
3010 /*
3011 In the following code we have a few functions to parse
3012 various collation customization non-terminal symbols.
3013 Unlike our usual coding convention, they return
3014 - 0 on "error" (when the rule was not scanned) and
3015 - 1 on "success"(when the rule was scanned).
3016 This is done intentionally to make body of the functions look easier
3017 and repeat the grammar of the rules in straightforward manner.
3018 For example:
3019
3020 // <x> ::= <y> | <z>
3021 int parse_x() { return parse_y() || parser_z(); }
3022
3023 // <x> ::= <y> <z>
3024 int parse_x() { return parse_y() && parser_z(); }
3025
3026 Using 1 on "not found" and 0 on "found" in the parser code would
3027 make the code more error prone and harder to read because
3028 of having to use inverse boolean logic.
3029 */
3030
3031 /**
3032 Scan a collation setting in brakets, for example UCA version.
3033
3034 @param p Collation customization parser
3035
3036 @retval 0 if setting was scanned.
3037 @retval 1 if setting was not scanned.
3038 */
3039
3040 11 static int my_coll_parser_scan_setting(MY_COLL_RULE_PARSER *p) {
3041 11 MY_COLL_RULES *rules = p->rules;
3042 11 MY_COLL_LEXEM *lexem = my_coll_parser_curr(p);
3043
3044
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 6 times.
11 if (!lex_cmp(lexem, STRING_WITH_LEN("[version 4.0.0]"))) {
3045 5 rules->uca = &my_uca_v400;
3046
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
6 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[version 5.2.0]"))) {
3047 2 rules->uca = &my_uca_v520;
3048
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[shift-after-method expand]"))) {
3049 2 rules->shift_after_method = my_shift_method_expand;
3050
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[shift-after-method simple]"))) {
3051 rules->shift_after_method = my_shift_method_simple;
3052 } else {
3053 2 return 0;
3054 }
3055 9 return my_coll_parser_scan(p);
3056 }
3057
3058 /**
3059 Scan multiple collation settings
3060
3061 @param p Collation customization parser
3062
3063 @retval 0 if no settings were scanned.
3064 @retval 1 if one or more settings were scanned.
3065 */
3066
3067 26380 static int my_coll_parser_scan_settings(MY_COLL_RULE_PARSER *p) {
3068 /* Scan collation setting or special purpose command */
3069
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 26378 times.
26389 while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) {
3070
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 9 times.
11 if (!my_coll_parser_scan_setting(p)) return 0;
3071 }
3072 26378 return 1;
3073 }
3074
3075 /**
3076 Scan [before xxx] reset option
3077
3078 @param p Collation customization parser
3079
3080 @retval 0 if reset option was not scanned.
3081 @retval 1 if reset option was scanned.
3082 */
3083
3084 23658 static int my_coll_parser_scan_reset_before(MY_COLL_RULE_PARSER *p) {
3085 23658 MY_COLL_LEXEM *lexem = my_coll_parser_curr(p);
3086
6/6
✓ Branch 0 taken 23614 times.
✓ Branch 1 taken 44 times.
✓ Branch 2 taken 6358 times.
✓ Branch 3 taken 17256 times.
✓ Branch 4 taken 6402 times.
✓ Branch 5 taken 17256 times.
47272 if (!lex_cmp(lexem, STRING_WITH_LEN("[before primary]")) ||
3087 23614 !lex_cmp(lexem, STRING_WITH_LEN("[before 1]"))) {
3088 6402 p->rule.before_level = 1;
3089
6/6
✓ Branch 0 taken 17252 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1022 times.
✓ Branch 3 taken 16230 times.
✓ Branch 4 taken 1026 times.
✓ Branch 5 taken 16230 times.
34508 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before secondary]")) ||
3090 17252 !lex_cmp(lexem, STRING_WITH_LEN("[before 2]"))) {
3091 1026 p->rule.before_level = 2;
3092
6/6
✓ Branch 0 taken 16226 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 16170 times.
✓ Branch 3 taken 56 times.
✓ Branch 4 taken 16174 times.
✓ Branch 5 taken 56 times.
32456 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before tertiary]")) ||
3093 16226 !lex_cmp(lexem, STRING_WITH_LEN("[before 3]"))) {
3094 16174 p->rule.before_level = 3;
3095
5/6
✓ Branch 0 taken 52 times.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 52 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 52 times.
108 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before quaternary]")) ||
3096 52 !lex_cmp(lexem, STRING_WITH_LEN("[before 4]"))) {
3097 4 p->rule.before_level = 4;
3098 } else {
3099 52 p->rule.before_level = 0;
3100 52 return 0; /* Don't scan the next character */
3101 }
3102 23606 return my_coll_parser_scan(p);
3103 }
3104
3105 /**
3106 Scan logical position and add to the wide string.
3107
3108 @param p Collation customization parser
3109 @param pwc Wide string to add code to
3110 @param limit The result string cannot be longer than 'limit' characters
3111
3112 @retval 0 if logical position was not scanned.
3113 @retval 1 if logical position was scanned.
3114 */
3115
3116 70 static int my_coll_parser_scan_logical_position(MY_COLL_RULE_PARSER *p,
3117 my_wc_t *pwc, size_t limit) {
3118 70 MY_COLL_RULES *rules = p->rules;
3119 70 MY_COLL_LEXEM *lexem = my_coll_parser_curr(p);
3120
3121
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 62 times.
70 if (!lex_cmp(lexem, STRING_WITH_LEN("[first non-ignorable]")))
3122 8 lexem->code = rules->uca->first_non_ignorable;
3123
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 50 times.
62 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last non-ignorable]")))
3124 12 lexem->code = rules->uca->last_non_ignorable;
3125
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 46 times.
50 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first primary ignorable]")))
3126 4 lexem->code = rules->uca->first_primary_ignorable;
3127
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 42 times.
46 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last primary ignorable]")))
3128 4 lexem->code = rules->uca->last_primary_ignorable;
3129
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 36 times.
42 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first secondary ignorable]")))
3130 6 lexem->code = rules->uca->first_secondary_ignorable;
3131
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 32 times.
36 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last secondary ignorable]")))
3132 4 lexem->code = rules->uca->last_secondary_ignorable;
3133
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 28 times.
32 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first tertiary ignorable]")))
3134 4 lexem->code = rules->uca->first_tertiary_ignorable;
3135
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 24 times.
28 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last tertiary ignorable]")))
3136 4 lexem->code = rules->uca->last_tertiary_ignorable;
3137
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 20 times.
24 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first trailing]")))
3138 4 lexem->code = rules->uca->first_trailing;
3139
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 16 times.
20 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last trailing]")))
3140 4 lexem->code = rules->uca->last_trailing;
3141
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first variable]")))
3142 8 lexem->code = rules->uca->first_variable;
3143
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last variable]")))
3144 8 lexem->code = rules->uca->last_variable;
3145 else
3146 return 0; /* Don't scan the next token */
3147
3148
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
70 if (!my_coll_rule_expand(pwc, limit, lexem->code)) {
3149 /*
3150 Logical position can not be in a contraction,
3151 so the above call should never fail.
3152 Let's assert in debug version and print
3153 a nice error message in production version.
3154 */
3155 assert(0);
3156 return my_coll_parser_too_long_error(p, "Logical position");
3157 }
3158 70 return my_coll_parser_scan(p);
3159 }
3160
3161 /**
3162 Scan character list
3163
3164 @<character list@> ::= CHAR [ CHAR... ]
3165
3166 @param p Collation customization parser
3167 @param pwc Character string to add code to
3168 @param limit The result string cannot be longer than 'limit' characters
3169 @param name E.g. "contraction", "expansion"
3170
3171 @retval 0 if character sequence was not scanned.
3172 @retval 1 if character sequence was scanned.
3173 */
3174
3175 1478396 static int my_coll_parser_scan_character_list(MY_COLL_RULE_PARSER *p,
3176 my_wc_t *pwc, size_t limit,
3177 const char *name) {
3178
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1478396 times.
1478396 if (my_coll_parser_curr(p)->term != MY_COLL_LEXEM_CHAR)
3179 return my_coll_parser_expected_error(p, MY_COLL_LEXEM_CHAR);
3180
3181
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1478396 times.
1478396 if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code))
3182 return my_coll_parser_too_long_error(p, name);
3183
3184
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1478396 times.
1478396 if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_CHAR)) return 0;
3185
3186
2/2
✓ Branch 0 taken 121985 times.
✓ Branch 1 taken 1478396 times.
1600381 while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CHAR) {
3187
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 121985 times.
121985 if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code))
3188 return my_coll_parser_too_long_error(p, name);
3189 121985 my_coll_parser_scan(p);
3190 }
3191 1478396 return 1;
3192 }
3193
3194 /**
3195 Scan reset sequence
3196
3197 @<reset sequence@> ::=
3198 [ @<reset before option@> ] @<character list@>
3199 | [ @<reset before option@> ] @<logical reset position@>
3200
3201 @param p Collation customization parser
3202
3203 @retval 0 if reset sequence was not scanned.
3204 @retval 1 if reset sequence was scanned.
3205 */
3206
3207 267272 static int my_coll_parser_scan_reset_sequence(MY_COLL_RULE_PARSER *p) {
3208 267272 my_coll_rule_reset(&p->rule);
3209
3210 /* Scan "[before x]" option, if exists */
3211
2/2
✓ Branch 0 taken 23658 times.
✓ Branch 1 taken 243614 times.
267272 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION)
3212 23658 my_coll_parser_scan_reset_before(p);
3213
3214 /* Try logical reset position */
3215
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 267202 times.
267272 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) {
3216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
70 if (!my_coll_parser_scan_logical_position(p, p->rule.base, 1)) return 0;
3217 } else {
3218 /* Scan single reset character or expansion */
3219
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 267202 times.
267202 if (!my_coll_parser_scan_character_list(p, p->rule.base,
3220 MY_UCA_MAX_EXPANSION, "Expansion"))
3221 return 0;
3222 }
3223
3224
2/2
✓ Branch 0 taken 267268 times.
✓ Branch 1 taken 4 times.
267272 if ((p->rules->shift_after_method == my_shift_method_expand ||
3225
2/2
✓ Branch 0 taken 6400 times.
✓ Branch 1 taken 260868 times.
267268 p->rule.before_level == 1) &&
3226
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 6358 times.
6404 p->rules->uca->version < UCA_V900) /* Apply "before primary" option */
3227 {
3228 /*
3229 Suppose we have this rule: &B[before primary] < C
3230 i.e. we need to put C before B, but after A, so
3231 the result order is: A < C < B.
3232
3233 Let primary weight of B be [BBBB].
3234
3235 We cannot just use [BBBB-1] as weight for C:
3236 DUCET does not have enough unused weights between any two characters,
3237 so using [BBBB-1] will likely make C equal to the previous character,
3238 which is A, so we'll get this order instead of the desired: A = C < B.
3239
3240 To guarantee that that C is sorted after A, we'll use expansion
3241 with a kind of "biggest possible character".
3242 As "biggest possible character" we'll use "last_non_ignorable":
3243
3244 We'll compose weight for C as: [BBBB-1][MMMM+1]
3245 where [MMMM] is weight for "last_non_ignorable".
3246
3247 We also do the same trick for "reset after" if the collation
3248 option says so. E.g. for the rules "&B < C", weight for
3249 C will be calculated as: [BBBB][MMMM+1]
3250
3251 At this point we only need to store codepoints
3252 'B' and 'last_non_ignorable'. Actual weights for 'C'
3253 will be calculated according to the above formula later,
3254 in create_tailoring().
3255 */
3256
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46 times.
46 if (!my_coll_rule_expand(p->rule.base, MY_UCA_MAX_EXPANSION,
3257 46 p->rules->uca->last_non_ignorable))
3258 return my_coll_parser_too_long_error(p, "Expansion");
3259 }
3260 267272 return 1;
3261 }
3262
3263 /**
3264 Scan shift sequence
3265
3266 @<shift sequence@> ::=
3267 @<character list@> [ / @<character list@> ]
3268 | @<character list@> [ | @<character list@> ]
3269
3270 @param p Collation customization parser
3271
3272 @retval 0 if shift sequence was not scanned.
3273 @retval 1 if shift sequence was scanned.
3274 */
3275
3276 973618 static int my_coll_parser_scan_shift_sequence(MY_COLL_RULE_PARSER *p) {
3277 MY_COLL_RULE before_extend;
3278
3279 973618 memset(&p->rule.curr, 0, sizeof(p->rule.curr));
3280
3281 /* Scan single shift character or contraction */
3282
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 973618 times.
973618 if (!my_coll_parser_scan_character_list(
3283
1/2
✓ Branch 0 taken 973618 times.
✗ Branch 1 not taken.
973618 p, p->rule.curr, MY_UCA_MAX_CONTRACTION, "Contraction"))
3284 return 0;
3285
3286 973618 before_extend = p->rule; /* Remember the part before "/" */
3287
3288 /* Append the part after "/" as expansion */
3289
2/2
✓ Branch 0 taken 10588 times.
✓ Branch 1 taken 963030 times.
973618 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND) {
3290
1/2
✓ Branch 0 taken 10588 times.
✗ Branch 1 not taken.
10588 my_coll_parser_scan(p);
3291
2/4
✓ Branch 0 taken 10588 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 10588 times.
10588 if (!my_coll_parser_scan_character_list(p, p->rule.base,
3292 MY_UCA_MAX_EXPANSION, "Expansion"))
3293 return 0;
3294
2/2
✓ Branch 0 taken 186122 times.
✓ Branch 1 taken 776908 times.
963030 } else if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CONTEXT) {
3295 /*
3296 We support 2-character long context sequences only:
3297 one character is the previous context, plus the current character.
3298 It's OK as Unicode's CLDR does not have longer examples.
3299 */
3300
1/2
✓ Branch 0 taken 186122 times.
✗ Branch 1 not taken.
186122 my_coll_parser_scan(p);
3301 186122 p->rule.with_context = true;
3302
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 186122 times.
186122 if (!my_coll_parser_scan_character_list(
3303
1/2
✓ Branch 0 taken 186122 times.
✗ Branch 1 not taken.
186122 p, p->rule.curr + 1, MY_UCA_MAX_EXPANSION - 1, "context"))
3304 return 0;
3305 /*
3306 It might be CONTEXT followed by EXPANSION. For example, Japanese
3307 collation has one rule defined as:
3308 "&[before 3]へ<<<へ|ゝ=べ|ゝ=へ|ゞ/\u3099"
3309 The part of "へ|ゞ/\u3099" is CONTEXT ('|') followed by EXPANSION ('/').
3310 */
3311
2/2
✓ Branch 0 taken 40866 times.
✓ Branch 1 taken 145256 times.
186122 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND) {
3312
1/2
✓ Branch 0 taken 40866 times.
✗ Branch 1 not taken.
40866 my_coll_parser_scan(p);
3313 40866 size_t len = my_wstrnlen(p->rule.base, MY_UCA_MAX_EXPANSION);
3314
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 40866 times.
40866 if (!my_coll_parser_scan_character_list(
3315
1/2
✓ Branch 0 taken 40866 times.
✗ Branch 1 not taken.
40866 p, p->rule.base + len, MY_UCA_MAX_EXPANSION - len, "Expansion"))
3316 return 0;
3317 }
3318 }
3319
3320 /* Add rule to the rule list */
3321
2/4
✓ Branch 0 taken 973618 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 973618 times.
973618 if (my_coll_rules_add(p->rules, &p->rule)) return 0;
3322
3323 973618 p->rule = before_extend; /* Restore to the state before "/" */
3324
3325 973618 return 1;
3326 }
3327
3328 /**
3329 Scan shift operator
3330
3331 @<shift@> ::= < | << | <<< | <<<< | =
3332
3333 @param p Collation customization parser
3334
3335 @retval 0 if shift operator was not scanned.
3336 @retval 1 if shift operator was scanned.
3337 */
3338 1240890 static int my_coll_parser_scan_shift(MY_COLL_RULE_PARSER *p) {
3339
2/2
✓ Branch 0 taken 973618 times.
✓ Branch 1 taken 267272 times.
1240890 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_SHIFT) {
3340 973618 my_coll_rule_shift_at_level(&p->rule, my_coll_parser_curr(p)->diff);
3341 973618 return my_coll_parser_scan(p);
3342 }
3343 267272 return 0;
3344 }
3345
3346 /**
3347 Scan one rule: reset followed by a number of shifts
3348
3349 @<rule@> ::=
3350 & @<reset sequence@>
3351 @<shift@> @<shift sequence@>
3352 [ { @<shift@> @<shift sequence@> }... ]
3353
3354 @param p Collation customization parser
3355
3356 @retval 0 if rule was not scanned.
3357 @retval 1 if rule was scanned.
3358 */
3359 267272 static int my_coll_parser_scan_rule(MY_COLL_RULE_PARSER *p) {
3360
2/4
✓ Branch 0 taken 267272 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 267272 times.
534544 if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_RESET) ||
3361
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 267272 times.
267272 !my_coll_parser_scan_reset_sequence(p))
3362 return 0;
3363
3364 /* Scan the first required shift command */
3365
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 267271 times.
267272 if (!my_coll_parser_scan_shift(p))
3366 1 return my_coll_parser_expected_error(p, MY_COLL_LEXEM_SHIFT);
3367
3368 /* Scan the first shift sequence */
3369
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 267271 times.
267271 if (!my_coll_parser_scan_shift_sequence(p)) return 0;
3370
3371 /* Scan subsequent shift rules */
3372
2/2
✓ Branch 0 taken 706347 times.
✓ Branch 1 taken 267271 times.
973618 while (my_coll_parser_scan_shift(p)) {
3373
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 706347 times.
706347 if (!my_coll_parser_scan_shift_sequence(p)) return 0;
3374 }
3375 267271 return 1;
3376 }
3377
3378 /**
3379 Scan collation customization: settings followed by rules
3380
3381 @<collation customization@> ::=
3382 [ @<setting@> ... ]
3383 [ @<rule@>... ]
3384
3385 @param p Collation customization parser
3386
3387 @retval 0 if collation customization expression was not scanned.
3388 @retval 1 if collation customization expression was scanned.
3389 */
3390
3391 26380 static int my_coll_parser_exec(MY_COLL_RULE_PARSER *p) {
3392
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 26378 times.
26380 if (!my_coll_parser_scan_settings(p)) return 0;
3393
3394
2/2
✓ Branch 0 taken 267272 times.
✓ Branch 1 taken 26377 times.
293649 while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_RESET) {
3395
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 267271 times.
267272 if (!my_coll_parser_scan_rule(p)) return 0;
3396 }
3397 /* Make sure no unparsed input data left */
3398 26377 return my_coll_parser_scan_term(p, MY_COLL_LEXEM_EOF);
3399 }
3400
3401 /*
3402 Collation language syntax parser.
3403 Uses lexical parser.
3404
3405 @param rules Collation rule list to load to.
3406 @param str A string with collation customization.
3407 @param str_end End of the string.
3408 @param col_name Collation name
3409
3410 @retval 0 on success
3411 @retval 1 on error
3412 */
3413
3414 26380 static int my_coll_rule_parse(MY_COLL_RULES *rules, const char *str,
3415 const char *str_end, const char *col_name) {
3416 MY_COLL_RULE_PARSER p;
3417
3418
1/2
✓ Branch 0 taken 26380 times.
✗ Branch 1 not taken.
26380 my_coll_parser_init(&p, rules, str, str_end);
3419
3420
3/4
✓ Branch 0 taken 26380 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 26377 times.
26380 if (!my_coll_parser_exec(&p)) {
3421 3 rules->loader->errcode = EE_COLLATION_PARSER_ERROR;
3422
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 my_coll_lexem_print_error(my_coll_parser_curr(&p), rules->loader->errarg,
3423 sizeof(rules->loader->errarg) - 1, p.errstr,
3424 col_name);
3425 3 return 1;
3426 }
3427 26377 return 0;
3428 }
3429
3430 6960 static void spread_case_mask(uint16 *to, size_t to_stride,
3431 size_t tailored_ce_cnt, uint16 case_mask) {
3432
2/2
✓ Branch 0 taken 17110 times.
✓ Branch 1 taken 6960 times.
24070 for (size_t i = 0; i < tailored_ce_cnt; ++i) {
3433 17110 uint16 *case_weight = &to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride];
3434
2/2
✓ Branch 0 taken 7540 times.
✓ Branch 1 taken 9570 times.
17110 if (*case_weight > CASE_FIRST_UPPER_MASK)
3435 7540 case_mask = *case_weight & 0xFF00;
3436
2/2
✓ Branch 0 taken 6235 times.
✓ Branch 1 taken 3335 times.
9570 else if (*case_weight)
3437 6235 *case_weight |= case_mask;
3438 }
3439 6960 }
3440
3441 /*
3442 If the collation is marked as [caseFirst upper], move all of the weights
3443 around to accommodate that. Only tailored weights are changed; for non-tailored
3444 weights, we do it on-the-fly in uca_scanner_900::apply_case_first().
3445
3446 [caseFirst upper] is a directive that says that case should override all
3447 other tertiary case concerns (in a sense, a “level 2.5”), and furthermore,
3448 that uppercase should come before lowercase. (Normally lowercase sorts
3449 before uppercase.) It is currently only used in the Danish collation.
3450
3451 This is done by looking at the tertiary weight, inferring the case from it,
3452 and then using the upper bits (which are normally unused) to signal the case.
3453 The algorithm is detailed in Unicode TR35, section 3.14, although we don't
3454 seem to follow it exactly.
3455 */
3456 1146168 static void change_weight_if_case_first(CHARSET_INFO *cs,
3457 const MY_UCA_INFO *dst, MY_COLL_RULE *r,
3458 uint16 *to, size_t to_stride,
3459 size_t curr_len,
3460 size_t tailored_ce_cnt) {
3461 /* We only need to implement [caseFirst upper] right now. */
3462
4/4
✓ Branch 0 taken 472986 times.
✓ Branch 1 taken 673182 times.
✓ Branch 2 taken 6960 times.
✓ Branch 3 taken 466026 times.
1146168 if (!(cs->coll_param && cs->coll_param->case_first == CASE_FIRST_UPPER &&
3463
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6960 times.
6960 cs->levels_for_compare == 3))
3464 1139208 return;
3465
3466
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6960 times.
6960 assert(cs->uca->version == UCA_V900);
3467
3468 // How many CEs this character has with non-ignorable primary weight.
3469 6960 int tailored_pri_cnt = 0;
3470 6960 int origin_pri_cnt = 0;
3471
2/2
✓ Branch 0 taken 11165 times.
✓ Branch 1 taken 3915 times.
15080 for (size_t i = 0; i < tailored_ce_cnt; ++i) {
3472 /*
3473 If rule A has already applied a case weight change, and we have rule B
3474 which is inherited from A, apply the same case weight change on the rest
3475 of rule B and return.
3476 */
3477
2/2
✓ Branch 0 taken 3045 times.
✓ Branch 1 taken 8120 times.
11165 if (to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride] > CASE_FIRST_UPPER_MASK) {
3478 3045 spread_case_mask(to, to_stride, tailored_ce_cnt, /*case_mask=*/0);
3479 3045 return;
3480 }
3481
2/2
✓ Branch 0 taken 6670 times.
✓ Branch 1 taken 1450 times.
8120 if (to[i * MY_UCA_900_CE_SIZE * to_stride]) tailored_pri_cnt++;
3482 }
3483
3/4
✓ Branch 0 taken 1450 times.
✓ Branch 1 taken 2465 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1450 times.
3915 if (r->before_level == 1 || r->diff[0]) tailored_pri_cnt--;
3484
3485 // Use the DUCET weight to detect the character's case.
3486 3915 MY_UCA_INFO *src = &my_uca_v900;
3487 3915 int changed_ce = 0;
3488
3489 3915 my_wc_t *curr = r->curr;
3490
2/2
✓ Branch 0 taken 4350 times.
✓ Branch 1 taken 3915 times.
8265 for (size_t i = 0; i < curr_len; ++i) {
3491 4350 const uint16 *from = my_char_weight_addr_900(src, *curr);
3492 4350 uint page = *curr >> 8;
3493 4350 uint code = *curr & 0xFF;
3494 4350 curr++;
3495 4350 int ce_cnt =
3496
1/2
✓ Branch 0 taken 4350 times.
✗ Branch 1 not taken.
4350 src->weights[page] ? UCA900_NUM_OF_CE(src->weights[page], code) : 0;
3497
2/2
✓ Branch 0 taken 8120 times.
✓ Branch 1 taken 4350 times.
12470 for (int i_ce = 0; i_ce < ce_cnt; ++i_ce) {
3498
2/2
✓ Branch 0 taken 4930 times.
✓ Branch 1 taken 3190 times.
8120 if (from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS]) origin_pri_cnt++;
3499 }
3500 }
3501 3915 int case_to_copy = 0;
3502
2/2
✓ Branch 0 taken 2900 times.
✓ Branch 1 taken 1015 times.
3915 if (origin_pri_cnt <= tailored_pri_cnt)
3503 2900 case_to_copy = origin_pri_cnt;
3504 else
3505 1015 case_to_copy = tailored_pri_cnt - 1;
3506 3915 int upper_cnt = 0;
3507 3915 int lower_cnt = 0;
3508 3915 curr = r->curr;
3509 3915 uint16 case_mask = 0;
3510
2/2
✓ Branch 0 taken 4350 times.
✓ Branch 1 taken 3915 times.
8265 for (size_t curr_ind = 0; curr_ind < curr_len; ++curr_ind) {
3511 4350 const uint16 *from = my_char_weight_addr_900(src, *curr);
3512 4350 uint page = *curr >> 8;
3513 4350 uint code = *curr & 0xFF;
3514 4350 curr++;
3515 4350 int ce_cnt =
3516
1/2
✓ Branch 0 taken 4350 times.
✗ Branch 1 not taken.
4350 src->weights[page] ? UCA900_NUM_OF_CE(src->weights[page], code) : 0;
3517 4350 changed_ce = 0;
3518
2/2
✓ Branch 0 taken 8120 times.
✓ Branch 1 taken 4350 times.
12470 for (int i_ce = 0; i_ce < ce_cnt; ++i_ce) {
3519 8120 uint16 primary_weight = from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS];
3520
2/2
✓ Branch 0 taken 4930 times.
✓ Branch 1 taken 3190 times.
8120 if (primary_weight) {
3521 4930 uint16 case_weight = from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS +
3522 4930 2 * UCA900_DISTANCE_BETWEEN_LEVELS];
3523 4930 uint16 *ce_to = nullptr;
3524
2/2
✓ Branch 0 taken 2465 times.
✓ Branch 1 taken 2465 times.
4930 if (is_tertiary_weight_upper_case(case_weight)) {
3525
2/2
✓ Branch 0 taken 1015 times.
✓ Branch 1 taken 1450 times.
2465 if (!case_to_copy)
3526 1015 upper_cnt++;
3527 else
3528 1450 case_mask = CASE_FIRST_UPPER_MASK;
3529 } else {
3530
2/2
✓ Branch 0 taken 1015 times.
✓ Branch 1 taken 1450 times.
2465 if (!case_to_copy)
3531 1015 lower_cnt++;
3532 else
3533 1450 case_mask = CASE_FIRST_LOWER_MASK;
3534 }
3535
2/2
✓ Branch 0 taken 2900 times.
✓ Branch 1 taken 2030 times.
4930 if (case_to_copy) {
3536 do {
3537 2900 ce_to = to + changed_ce * MY_UCA_900_CE_SIZE * to_stride;
3538 2900 changed_ce++;
3539
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2900 times.
2900 } while (*ce_to == 0);
3540 2900 ce_to[2 * to_stride] |= case_mask;
3541 2900 case_to_copy--;
3542 }
3543 }
3544 }
3545 }
3546
2/2
✓ Branch 0 taken 2900 times.
✓ Branch 1 taken 1015 times.
3915 if (origin_pri_cnt <= tailored_pri_cnt) {
3547
2/2
✓ Branch 0 taken 290 times.
✓ Branch 1 taken 2900 times.
3190 for (int i = origin_pri_cnt; i < tailored_pri_cnt; ++i) {
3548 290 const int offset = changed_ce * MY_UCA_900_CE_SIZE * to_stride;
3549
2/4
✓ Branch 0 taken 290 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 290 times.
✗ Branch 3 not taken.
290 if (to[offset] && to[offset] < dst->extra_ce_pri_base)
3550 290 to[offset + 2 * to_stride] = 0;
3551 }
3552 } else {
3553
4/4
✓ Branch 0 taken 580 times.
✓ Branch 1 taken 435 times.
✓ Branch 2 taken 145 times.
✓ Branch 3 taken 435 times.
1015 if (upper_cnt && lower_cnt)
3554 145 case_mask = CASE_FIRST_MIXED_MASK;
3555
3/4
✓ Branch 0 taken 435 times.
✓ Branch 1 taken 435 times.
✓ Branch 2 taken 435 times.
✗ Branch 3 not taken.
870 else if (upper_cnt && !lower_cnt)
3556 435 case_mask = CASE_FIRST_UPPER_MASK;
3557 else
3558 435 case_mask = CASE_FIRST_LOWER_MASK;
3559 1015 bool skipped_extra_ce = false;
3560
2/2
✓ Branch 0 taken 2030 times.
✓ Branch 1 taken 1015 times.
3045 for (int i = tailored_ce_cnt - 1; i >= 0; --i) {
3561 2030 int offset = i * MY_UCA_900_CE_SIZE * to_stride;
3562
3/4
✓ Branch 0 taken 2030 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1015 times.
✓ Branch 3 taken 1015 times.
2030 if (to[offset] && to[offset] < dst->extra_ce_pri_base) {
3563
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 1015 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1015 times.
✗ Branch 5 not taken.
1015 if ((r->before_level == 1 || r->diff[0]) && !skipped_extra_ce) {
3564 1015 skipped_extra_ce = true;
3565 1015 continue;
3566 }
3567 to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride] |= case_mask;
3568 break;
3569 }
3570 }
3571 }
3572 3915 spread_case_mask(to, to_stride, tailored_ce_cnt, case_mask);
3573 }
3574
3575 670139 static size_t my_char_weight_put_900(MY_UCA_INFO *dst, uint16 *to,
3576 size_t to_stride, size_t to_length,
3577 uint16 *to_num_ce,
3578 const MY_COLL_RULE *rule,
3579 size_t base_len) {
3580 size_t count;
3581 670139 int total_ce_cnt = 0;
3582
3583 670139 const my_wc_t *base = rule->base;
3584
2/2
✓ Branch 0 taken 944298 times.
✓ Branch 1 taken 670139 times.
1614437 for (count = 0; base_len;) {
3585 944298 const uint16 *from = nullptr;
3586 944298 size_t from_stride = 0;
3587 944298 int ce_cnt = 0;
3588
3589
2/2
✓ Branch 0 taken 334944 times.
✓ Branch 1 taken 927184 times.
1262128 for (size_t chlen = base_len; chlen > 1; chlen--) {
3590
3/4
✓ Branch 0 taken 334944 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 17114 times.
✓ Branch 3 taken 317830 times.
334944 if ((from = my_uca_contraction_weight(dst->contraction_nodes, base,
3591 chlen))) {
3592 17114 from_stride = 1;
3593 17114 base += chlen;
3594 17114 base_len -= chlen;
3595 17114 ce_cnt = *(from + MY_UCA_MAX_WEIGHT_SIZE - 1);
3596 17114 break;
3597 }
3598 }
3599
3600
2/2
✓ Branch 0 taken 927184 times.
✓ Branch 1 taken 17114 times.
944298 if (!from) {
3601 927184 uint page = *base >> 8;
3602 927184 uint code = *base & 0xFF;
3603 927184 base++;
3604 927184 base_len--;
3605
1/2
✓ Branch 0 taken 927184 times.
✗ Branch 1 not taken.
927184 if (dst->weights[page]) {
3606 927184 from = UCA900_WEIGHT_ADDR(dst->weights[page], /*level=*/0, code);
3607 927184 from_stride = UCA900_DISTANCE_BETWEEN_LEVELS;
3608 927184 ce_cnt = UCA900_NUM_OF_CE(dst->weights[page], code);
3609 }
3610 }
3611
3612 944298 for (int weight_ind = 0;
3613
3/4
✓ Branch 0 taken 3415926 times.
✓ Branch 1 taken 944298 times.
✓ Branch 2 taken 3415926 times.
✗ Branch 3 not taken.
4360224 weight_ind < ce_cnt * MY_UCA_900_CE_SIZE && count < to_length;
3614 weight_ind++) {
3615 3415926 *to = *from;
3616 3415926 to += to_stride;
3617 3415926 from += from_stride;
3618 3415926 count++;
3619 }
3620 944298 total_ce_cnt += ce_cnt;
3621 }
3622
3623 /*
3624 For shift on primary weight, there might be no enough room in the tables.
3625 For example, Sihala has the rule "&\\u0DA5 < \\u0DA4", which means
3626 that we should move U+0DA4 after U+0DA5 (on the primary level).
3627 However, there is no room after U+0DA5 in DUCET unless we wanted to
3628 conflict with U+0DA6:
3629
3630 0DA4 ; [.28EC.0020.0002] # SINHALA LETTER TAALUJA NAASIKYAYA
3631 0DA5 ; [.28ED.0020.0002] # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA
3632 0DA6 ; [.28EE.0020.0002] # SINHALA LETTER SANYAKA JAYANNA
3633
3634 Before our implementation of UCA 9.0.0, the shift on primary weight was
3635 done by making it a fake expansion when parsing the rule, where we'd expand
3636 U+0DA4 to U+0DA5 U+MMMM, MMMM being 'last_non_ignorable'. (This happens
3637 in my_coll_parser_scan_reset_sequence()). But from UCA 9.0.0, we also
3638 support accent- and case-sensitive collations, and then, having the extra
3639 weights of 'last_non_ignorable' (which is just a random character) on the
3640 second and third level may cause unexpected results for algorithms that
3641 use the meaning of the tertiary weight to infer case. Thus, we'll abandon
3642 the fake expansion way; instead, instead add an extra CE (after the one
3643 from U+0DA5, the character we are moving after) to represent all the
3644 weights we might want to shift. The actual shifting happens in
3645 apply_shift_900().
3646
3647 For the rule "&\\u0DA5 < \\u0DA4", U+0DA4's weights become
3648 [.28ED.0020.0002][.54A4.0000.0000], where 0x54A4 is the value of
3649 extra_ce_pri_base. We then apply the differences from the rule
3650 (which are never negative) to the last CE, so that it becomes
3651 e.g. [.54A5.0000.0000].
3652 */
3653
7/8
✓ Branch 0 taken 563308 times.
✓ Branch 1 taken 106831 times.
✓ Branch 2 taken 533406 times.
✓ Branch 3 taken 29902 times.
✓ Branch 4 taken 262087 times.
✓ Branch 5 taken 271319 times.
✓ Branch 6 taken 398820 times.
✗ Branch 7 not taken.
670139 if ((rule->diff[0] || rule->diff[1] || rule->diff[2]) && count < to_length) {
3654
2/2
✓ Branch 0 taken 106831 times.
✓ Branch 1 taken 291989 times.
398820 *to = rule->diff[0] ? dst->extra_ce_pri_base : 0;
3655 398820 to += to_stride;
3656
2/2
✓ Branch 0 taken 42622 times.
✓ Branch 1 taken 356198 times.
398820 *to = rule->diff[1] ? dst->extra_ce_sec_base : 0;
3657 398820 to += to_stride;
3658
2/2
✓ Branch 0 taken 338021 times.
✓ Branch 1 taken 60799 times.
398820 *to = rule->diff[2] ? dst->extra_ce_ter_base : 0;
3659 398820 to += to_stride;
3660 398820 total_ce_cnt++;
3661 398820 count += 3;
3662 }
3663 670139 total_ce_cnt =
3664 670139 std::min(total_ce_cnt, (MY_UCA_MAX_WEIGHT_SIZE - 1) / MY_UCA_900_CE_SIZE);
3665 670139 *to_num_ce = total_ce_cnt;
3666
3667 670139 return total_ce_cnt;
3668 }
3669
3670 /**
3671 Helper function:
3672 Copies UCA weights for a given "uint" string
3673 to the given location.
3674
3675 @param dst destination UCA weight data
3676 @param to destination address
3677 @param to_stride number of bytes between each successive weight in "to"
3678 @param to_length size of destination
3679 @param to_num_ce where to put the number of CEs generated
3680 @param rule The rule that contains the characters whose weight
3681 are to copied
3682 @param base_len The length of base character list
3683 @param uca_ver UCA version
3684
3685 @return number of weights put
3686 */
3687
3688 1146168 static size_t my_char_weight_put(MY_UCA_INFO *dst, uint16 *to, size_t to_stride,
3689 size_t to_length, uint16 *to_num_ce,
3690 const MY_COLL_RULE *rule, size_t base_len,
3691 enum_uca_ver uca_ver) {
3692
2/2
✓ Branch 0 taken 670139 times.
✓ Branch 1 taken 476029 times.
1146168 if (uca_ver == UCA_V900)
3693 670139 return my_char_weight_put_900(dst, to, to_stride, to_length, to_num_ce,
3694 670139 rule, base_len);
3695
3696 476029 const my_wc_t *base = rule->base;
3697 476029 size_t count = 0;
3698
2/2
✓ Branch 0 taken 484571 times.
✓ Branch 1 taken 476029 times.
960600 while (base_len != 0) {
3699 484571 const uint16 *from = nullptr;
3700
3701
2/2
✓ Branch 0 taken 9858 times.
✓ Branch 1 taken 483367 times.
493225 for (size_t chlen = base_len; chlen > 1; chlen--) {
3702
2/2
✓ Branch 0 taken 1204 times.
✓ Branch 1 taken 8654 times.
9858 if ((from = my_uca_contraction_weight(dst->contraction_nodes, base,
3703 chlen))) {
3704 1204 base += chlen;
3705 1204 base_len -= chlen;
3706 1204 break;
3707 }
3708 }
3709
3710
2/2
✓ Branch 0 taken 483367 times.
✓ Branch 1 taken 1204 times.
484571 if (!from) {
3711 483367 from = my_char_weight_addr(dst, *base);
3712 483367 base++;
3713 483367 base_len--;
3714 }
3715
3716
4/6
✓ Branch 0 taken 966450 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 481879 times.
✓ Branch 3 taken 484571 times.
✓ Branch 4 taken 481879 times.
✗ Branch 5 not taken.
966450 for (; from && *from && count < to_length;) {
3717 481879 *to = *from++;
3718 481879 to += to_stride;
3719 481879 count++;
3720 }
3721 }
3722
3723 476029 *to = 0;
3724 476029 return count;
3725 }
3726
3727 /**
3728 Alloc new page and copy the default UCA weights
3729 @param cs Character set
3730 @param loader Character set loader
3731 @param src Default UCA data to copy from
3732 @param dst UCA data to copy weights to
3733 @param page page number
3734
3735 @retval false on success
3736 @retval true on error
3737 */
3738 73572 static bool my_uca_copy_page(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
3739 const MY_UCA_INFO *src, MY_UCA_INFO *dst,
3740 size_t page) {
3741 73572 const uint dst_size = 256 * dst->lengths[page] * sizeof(uint16);
3742
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 73572 times.
73572 if (!(dst->weights[page] = (uint16 *)(loader->once_alloc)(dst_size)))
3743 return true;
3744
3745
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 73572 times.
73572 assert(src->lengths[page] <= dst->lengths[page]);
3746 73572 memset(dst->weights[page], 0, dst_size);
3747
3/4
✓ Branch 0 taken 73572 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 45345 times.
✓ Branch 3 taken 28227 times.
73572 if (cs->uca && cs->uca->version == UCA_V900) {
3748 45345 const uint src_size = 256 * src->lengths[page] * sizeof(uint16);
3749 45345 memcpy(dst->weights[page], src->weights[page], src_size);
3750
2/2
✓ Branch 0 taken 27893 times.
✓ Branch 1 taken 334 times.
73572 } else if (src->lengths[page] > 0) {
3751
2/2
✓ Branch 0 taken 7140608 times.
✓ Branch 1 taken 27893 times.
7168501 for (uint chc = 0; chc < 256; chc++) {
3752 7140608 memcpy(dst->weights[page] + chc * dst->lengths[page],
3753 7140608 src->weights[page] + chc * src->lengths[page],
3754 7140608 src->lengths[page] * sizeof(uint16));
3755 }
3756 }
3757 73572 return false;
3758 }
3759
3760 /*
3761 This is used to apply the weight shift if there is a [before 1] rule.
3762 If we have a rule "&[before 1] A < B < C", and A's collation element is [P, S,
3763 T], then in my_char_weight_put_900(), we append one extra collation element to
3764 A's CE to be B and C's CE. So B and C's CE becomes [P, S, T][p, 0, 0]. What we
3765 do with this function is to change B's CE to [P - 1, S, T][p + n, 0, 0].
3766 1. The rule "&[before 1] A < B < C" means "B < C < A" on primary level. Since
3767 "B < A", so we give B the first primary weight as (P - 1).
3768 2. p is a weight value which is the maximum regular primary weight in DUCET
3769 plus one (0x54A3 + 1 = 0x54A4). This is to make sure B's primary weight
3770 less than A and greater than any character which sorts before A.
3771 3. n is the number of characters in this rule's character list. For the B in
3772 this rule, n = 1. For the C in this rule, n = 2. This can make sure "B <
3773 C".
3774
3775 It is the same thing that apply_secondary_shift_900() and
3776 apply_tertiary_shift_900() do, but on different weight levels.
3777 */
3778 35547 static bool apply_primary_shift_900(MY_CHARSET_LOADER *loader,
3779 MY_COLL_RULES *rules, MY_COLL_RULE *r,
3780 uint16 *to, size_t to_stride,
3781 size_t nweights,
3782 uint16 *const last_weight_ptr) {
3783 /*
3784 Find the second-to-last non-ignorable primary weight to apply shift,
3785 because the last one is the extra CE we added in my_char_weight_put_900().
3786 */
3787 35547 int last_sec_pri = 0;
3788
1/2
✓ Branch 0 taken 35547 times.
✗ Branch 1 not taken.
35547 for (last_sec_pri = nweights - 2; last_sec_pri >= 0; --last_sec_pri) {
3789
1/2
✓ Branch 0 taken 35547 times.
✗ Branch 1 not taken.
35547 if (to[last_sec_pri * to_stride * MY_UCA_900_CE_SIZE]) break;
3790 }
3791
1/2
✓ Branch 0 taken 35547 times.
✗ Branch 1 not taken.
35547 if (last_sec_pri >= 0) {
3792 35547 to[last_sec_pri * to_stride * MY_UCA_900_CE_SIZE]--; /* Reset before */
3793
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 35547 times.
35547 if (rules->shift_after_method == my_shift_method_expand) {
3794 /*
3795 Special case. Don't let characters shifted after X
3796 and before next(X) intermix to each other.
3797
3798 For example:
3799 "[shift-after-method expand] &0 < a &[before primary]1 < A".
3800 I.e. we reorder 'a' after '0', and then 'A' before '1'.
3801 'a' must be sorted before 'A'.
3802
3803 Note, there are no real collations in CLDR which shift
3804 after and before two neighbouring characters. We need this
3805 just in case. Reserving 4096 (0x1000) weights for such
3806 cases is perfectly enough.
3807 */
3808 /* W3-TODO: const may vary on levels 2,3*/
3809 last_weight_ptr[0] += 0x1000;
3810 }
3811 } else {
3812 loader->errcode = EE_FAILED_TO_RESET_BEFORE_PRIMARY_IGNORABLE_CHAR;
3813 snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]);
3814 return true;
3815 }
3816 35547 return false;
3817 }
3818
3819 /*
3820 This is used to apply the weight shift if there is a [before 2] rule. Please
3821 see the comment on apply_primary_shift_900().
3822 */
3823 8176 static bool apply_secondary_shift_900(MY_CHARSET_LOADER *loader,
3824 MY_COLL_RULES *rules, MY_COLL_RULE *r,
3825 uint16 *to, size_t to_stride,
3826 size_t nweights,
3827 uint16 *const last_weight_ptr) {
3828 /*
3829 Find the second-to-last non-ignorable secondary weight to apply shift,
3830 because the last one is the extra CE we added in my_char_weight_put_900().
3831 */
3832 int last_sec_sec;
3833
1/2
✓ Branch 0 taken 8176 times.
✗ Branch 1 not taken.
8176 for (last_sec_sec = nweights - 2; last_sec_sec >= 0; --last_sec_sec) {
3834
1/2
✓ Branch 0 taken 8176 times.
✗ Branch 1 not taken.
8176 if (to[last_sec_sec * MY_UCA_900_CE_SIZE * to_stride + to_stride]) break;
3835 }
3836
1/2
✓ Branch 0 taken 8176 times.
✗ Branch 1 not taken.
8176 if (last_sec_sec >= 0) {
3837 // Reset before.
3838 8176 to[last_sec_sec * MY_UCA_900_CE_SIZE * to_stride + to_stride]--;
3839
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8176 times.
8176 if (rules->shift_after_method == my_shift_method_expand) {
3840 /*
3841 Same reason as in apply_primary_shift_900(), reserve 256 (0x100)
3842 weights for secondary level.
3843 */
3844 last_weight_ptr[to_stride] += 0x100;
3845 }
3846 } else {
3847 loader->errcode = EE_FAILED_TO_RESET_BEFORE_SECONDARY_IGNORABLE_CHAR;
3848 snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]);
3849 return true;
3850 }
3851 8176 return false;
3852 }
3853
3854 /*
3855 This is used to apply the weight shift if there is a [before 3] rule. Please
3856 see the comment on apply_primary_shift_900().
3857 */
3858 186102 static bool apply_tertiary_shift_900(MY_CHARSET_LOADER *loader,
3859 MY_COLL_RULES *rules, MY_COLL_RULE *r,
3860 uint16 *to, size_t to_stride,
3861 size_t nweights,
3862 uint16 *const last_weight_ptr) {
3863 /*
3864 Find the second-to-last non-ignorable tertiary weight to apply shift,
3865 because the last one is the extra CE we added in my_char_weight_put_900().
3866 */
3867 int last_sec_ter;
3868
1/2
✓ Branch 0 taken 186102 times.
✗ Branch 1 not taken.
186102 for (last_sec_ter = nweights - 2; last_sec_ter >= 0; --last_sec_ter) {
3869
1/2
✓ Branch 0 taken 186102 times.
✗ Branch 1 not taken.
186102 if (to[last_sec_ter * MY_UCA_900_CE_SIZE * to_stride + 2 * to_stride])
3870 186102 break;
3871 }
3872
1/2
✓ Branch 0 taken 186102 times.
✗ Branch 1 not taken.
186102 if (last_sec_ter >= 0) {
3873 // Reset before.
3874 186102 to[last_sec_ter * MY_UCA_900_CE_SIZE * to_stride + 2 * to_stride]--;
3875
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 186102 times.
186102 if (rules->shift_after_method == my_shift_method_expand) {
3876 /*
3877 Same reason as in apply_primary_shift_900(), reserve 16 (0x10)
3878 weights for tertiary level.
3879 */
3880 last_weight_ptr[to_stride * 2] += 0x10;
3881 }
3882 } else {
3883 loader->errcode = EE_FAILED_TO_RESET_BEFORE_TERTIARY_IGNORABLE_CHAR;
3884 snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]);
3885 return true;
3886 }
3887 186102 return false;
3888 }
3889
3890 670139 static bool apply_shift_900(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
3891 MY_COLL_RULE *r, uint16 *to, size_t to_stride,
3892 size_t nweights) {
3893 // nweights should not less than 1 because of the extra CE.
3894
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 670139 times.
670139 assert(nweights);
3895 // Apply level difference.
3896 670139 uint16 *const last_weight_ptr =
3897 670139 to + (nweights - 1) * to_stride * MY_UCA_900_CE_SIZE;
3898 670139 last_weight_ptr[0] += r->diff[0];
3899 670139 last_weight_ptr[to_stride] += r->diff[1];
3900 670139 last_weight_ptr[to_stride * 2] += r->diff[2];
3901
2/2
✓ Branch 0 taken 35547 times.
✓ Branch 1 taken 634592 times.
670139 if (r->before_level == 1) // Apply "&[before primary]".
3902 35547 return apply_primary_shift_900(loader, rules, r, to, to_stride, nweights,
3903 35547 last_weight_ptr);
3904
2/2
✓ Branch 0 taken 8176 times.
✓ Branch 1 taken 626416 times.
634592 else if (r->before_level == 2) // Apply "[before 2]".
3905 8176 return apply_secondary_shift_900(loader, rules, r, to, to_stride, nweights,
3906 8176 last_weight_ptr);
3907
2/2
✓ Branch 0 taken 186102 times.
✓ Branch 1 taken 440314 times.
626416 else if (r->before_level == 3) // Apply "[before 3]".
3908 186102 return apply_tertiary_shift_900(loader, rules, r, to, to_stride, nweights,
3909 186102 last_weight_ptr);
3910 440314 return false;
3911 }
3912
3913 1146168 static bool apply_shift(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
3914 MY_COLL_RULE *r, int level, uint16 *to,
3915 size_t to_stride, size_t nweights) {
3916
2/2
✓ Branch 0 taken 670139 times.
✓ Branch 1 taken 476029 times.
1146168 if (rules->uca->version == UCA_V900)
3917 670139 return apply_shift_900(loader, rules, r, to, to_stride, nweights);
3918
3919
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 476029 times.
476029 assert(to_stride == 1);
3920
3921 /* Apply level difference. */
3922
2/2
✓ Branch 0 taken 473049 times.
✓ Branch 1 taken 2980 times.
476029 if (nweights) {
3923 473049 to[nweights - 1] += r->diff[0];
3924
2/2
✓ Branch 0 taken 186 times.
✓ Branch 1 taken 472863 times.
473049 if (r->before_level == 1) /* Apply "&[before primary]" */
3925 {
3926
2/2
✓ Branch 0 taken 184 times.
✓ Branch 1 taken 2 times.
186 if (nweights >= 2) {
3927 184 to[nweights - 2]--; /* Reset before */
3928
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 128 times.
184 if (rules->shift_after_method == my_shift_method_expand) {
3929 /*
3930 Special case. Don't let characters shifted after X
3931 and before next(X) intermix to each other.
3932
3933 For example:
3934 "[shift-after-method expand] &0 < a &[before primary]1 < A".
3935 I.e. we reorder 'a' after '0', and then 'A' before '1'.
3936 'a' must be sorted before 'A'.
3937
3938 Note, there are no real collations in CLDR which shift
3939 after and before two neighbour characters. We need this
3940 just in case. Reserving 4096 (0x1000) weights for such
3941 cases is perfectly enough.
3942 */
3943 /* W3-TODO: const may vary on levels 2,3*/
3944 56 to[nweights - 1] += 0x1000;
3945 }
3946 } else {
3947 2 loader->errcode = EE_FAILED_TO_RESET_BEFORE_PRIMARY_IGNORABLE_CHAR;
3948 2 snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]);
3949 2 return true;
3950 }
3951 }
3952 } else {
3953 /* Shift to an ignorable character, e.g.: & \u0000 < \u0001 */
3954
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2980 times.
2980 assert(to[0] == 0);
3955 2980 to[0] = r->diff[level];
3956 }
3957 476027 return false;
3958 }
3959
3960 243219 static MY_CONTRACTION *add_contraction_to_trie(
3961 std::vector<MY_CONTRACTION> *cont_nodes, MY_COLL_RULE *r) {
3962 243219 MY_CONTRACTION new_node{0, {}, {}, {}, false, 0};
3963
2/2
✓ Branch 0 taken 186122 times.
✓ Branch 1 taken 57097 times.
243219 if (r->with_context) // previous-context contraction
3964 {
3965
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 186122 times.
186122 assert(my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION) == 2);
3966 std::vector<MY_CONTRACTION>::iterator node_it =
3967
1/2
✓ Branch 0 taken 186122 times.
✗ Branch 1 not taken.
186122 find_contraction_part_in_trie(*cont_nodes, r->curr[1]);
3968
6/6
✓ Branch 0 taken 185240 times.
✓ Branch 1 taken 882 times.
✓ Branch 2 taken 592 times.
✓ Branch 3 taken 184648 times.
✓ Branch 4 taken 1474 times.
✓ Branch 5 taken 184648 times.
186122 if (node_it == cont_nodes->end() || node_it->ch != r->curr[1]) {
3969 1474 new_node.ch = r->curr[1];
3970
1/2
✓ Branch 0 taken 1474 times.
✗ Branch 1 not taken.
1474 node_it = cont_nodes->insert(node_it, new_node);
3971 }
3972 186122 cont_nodes = &node_it->child_nodes_context;
3973
3974
1/2
✓ Branch 0 taken 186122 times.
✗ Branch 1 not taken.
186122 node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[0]);
3975
6/6
✓ Branch 0 taken 159348 times.
✓ Branch 1 taken 26774 times.
✓ Branch 2 taken 157584 times.
✓ Branch 3 taken 1764 times.
✓ Branch 4 taken 184358 times.
✓ Branch 5 taken 1764 times.
186122 if (node_it == cont_nodes->end() || node_it->ch != r->curr[0]) {
3976 184358 new_node.ch = r->curr[0];
3977
1/2
✓ Branch 0 taken 184358 times.
✗ Branch 1 not taken.
184358 node_it = cont_nodes->insert(node_it, new_node);
3978 }
3979 186122 node_it->is_contraction_tail = true;
3980 186122 node_it->contraction_len = 2;
3981 186122 return &(*node_it);
3982 } else // normal contraction
3983 {
3984 57097 size_t contraction_len = my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION);
3985 57097 std::vector<MY_CONTRACTION>::iterator node_it;
3986
2/2
✓ Branch 0 taken 125606 times.
✓ Branch 1 taken 57097 times.
182703 for (size_t ch_ind = 0; ch_ind < contraction_len; ++ch_ind) {
3987
1/2
✓ Branch 0 taken 125606 times.
✗ Branch 1 not taken.
125606 node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[ch_ind]);
3988
6/6
✓ Branch 0 taken 71090 times.
✓ Branch 1 taken 54516 times.
✓ Branch 2 taken 38718 times.
✓ Branch 3 taken 32372 times.
✓ Branch 4 taken 93234 times.
✓ Branch 5 taken 32372 times.
125606 if (node_it == cont_nodes->end() || node_it->ch != r->curr[ch_ind]) {
3989 93234 new_node.ch = r->curr[ch_ind];
3990
1/2
✓ Branch 0 taken 93234 times.
✗ Branch 1 not taken.
93234 node_it = cont_nodes->insert(node_it, new_node);
3991 }
3992 125606 cont_nodes = &node_it->child_nodes;
3993 }
3994 57097 node_it->is_contraction_tail = true;
3995 57097 node_it->contraction_len = contraction_len;
3996 57097 return &(*node_it);
3997 }
3998 243219 }
3999
4000 1146168 static bool apply_one_rule(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4001 MY_COLL_RULES *rules, MY_COLL_RULE *r, int level,
4002 MY_UCA_INFO *dst) {
4003 size_t nweights;
4004 1146168 size_t nreset = my_coll_rule_reset_length(r); /* Length of reset sequence */
4005 1146168 size_t nshift = my_coll_rule_shift_length(r); /* Length of shift sequence */
4006 uint16 *to, *to_num_ce;
4007 size_t to_stride;
4008
4009
2/2
✓ Branch 0 taken 243219 times.
✓ Branch 1 taken 902949 times.
1146168 if (nshift >= 2) /* Contraction */
4010 {
4011 size_t i;
4012 int flag;
4013 /* Add HEAD, MID and TAIL flags for the contraction parts */
4014 243219 my_uca_add_contraction_flag(
4015 dst->contraction_flags, r->curr[0],
4016
2/2
✓ Branch 0 taken 186122 times.
✓ Branch 1 taken 57097 times.
243219 r->with_context ? MY_UCA_PREVIOUS_CONTEXT_HEAD : MY_UCA_CNT_HEAD);
4017
2/2
✓ Branch 0 taken 11412 times.
✓ Branch 1 taken 243219 times.
254631 for (i = 1, flag = MY_UCA_CNT_MID1; i < nshift - 1; i++, flag <<= 1)
4018 11412 my_uca_add_contraction_flag(dst->contraction_flags, r->curr[i], flag);
4019 243219 my_uca_add_contraction_flag(
4020 dst->contraction_flags, r->curr[i],
4021
2/2
✓ Branch 0 taken 186122 times.
✓ Branch 1 taken 57097 times.
243219 r->with_context ? MY_UCA_PREVIOUS_CONTEXT_TAIL : MY_UCA_CNT_TAIL);
4022 /* Add new contraction to the contraction list */
4023 MY_CONTRACTION *trie_node =
4024 243219 add_contraction_to_trie(dst->contraction_nodes, r);
4025 243219 to = trie_node->weight;
4026 243219 to_stride = 1;
4027 243219 to_num_ce = &to[MY_UCA_MAX_WEIGHT_SIZE - 1];
4028 /* Store weights of the "reset to" character */
4029 nweights =
4030 243219 my_char_weight_put(dst, to, to_stride, MY_UCA_MAX_WEIGHT_SIZE - 1,
4031 243219 to_num_ce, r, nreset, rules->uca->version);
4032 } else {
4033 902949 my_wc_t pagec = (r->curr[0] >> 8);
4034
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 902949 times.
902949 assert(dst->weights[pagec]);
4035
3/4
✓ Branch 0 taken 902949 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 450304 times.
✓ Branch 3 taken 452645 times.
902949 if (cs->uca && cs->uca->version == UCA_V900) {
4036 450304 to = my_char_weight_addr_900(dst, r->curr[0]);
4037 450304 to_stride = UCA900_DISTANCE_BETWEEN_LEVELS;
4038 450304 to_num_ce = to - UCA900_DISTANCE_BETWEEN_LEVELS;
4039 } else {
4040 452645 to = my_char_weight_addr(dst, r->curr[0]);
4041 452645 to_stride = 1;
4042 452645 to_num_ce = to + (dst->lengths[pagec] - 1);
4043 }
4044 /* Store weights of the "reset to" character */
4045
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 902949 times.
902949 if (dst->lengths[pagec] == 0)
4046 nweights = 0;
4047 else
4048 902949 nweights = my_char_weight_put(dst, to, to_stride, dst->lengths[pagec] - 1,
4049 902949 to_num_ce, r, nreset, rules->uca->version);
4050 }
4051
4052 1146168 change_weight_if_case_first(cs, dst, r, to, to_stride, nshift, nweights);
4053 /* Apply level difference. */
4054 1146168 return apply_shift(loader, rules, r, level, to, to_stride, nweights);
4055 }
4056
4057 /**
4058 Check if collation rules are valid,
4059 i.e. characters are not outside of the collation supported range.
4060 */
4061 26377 static int check_rules(MY_CHARSET_LOADER *loader, const MY_COLL_RULES *rules,
4062 const MY_UCA_INFO *dst, const MY_UCA_INFO *src) {
4063 const MY_COLL_RULE *r, *rlast;
4064
2/2
✓ Branch 0 taken 1146168 times.
✓ Branch 1 taken 26377 times.
1172545 for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) {
4065
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1146168 times.
1146168 if (r->curr[0] > dst->maxchar) {
4066 loader->errcode = EE_SHIFT_CHAR_OUT_OF_RANGE;
4067 snprintf(loader->errarg, sizeof(loader->errarg), "u%04X",
4068 (uint)r->curr[0]);
4069 return true;
4070
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1146168 times.
1146168 } else if (r->base[0] > src->maxchar) {
4071 loader->errcode = EE_RESET_CHAR_OUT_OF_RANGE;
4072 snprintf(loader->errarg, sizeof(loader->errarg), "u%04X",
4073 (uint)r->base[0]);
4074 return true;
4075 }
4076 }
4077 26377 return false;
4078 }
4079
4080 8536 static void synthesize_lengths_900(uchar *lengths, const uint16 *const *weights,
4081 uint npages) {
4082
2/2
✓ Branch 0 taken 37148672 times.
✓ Branch 1 taken 8536 times.
37157208 for (uint page = 0; page < npages; ++page) {
4083 37148672 int max_len = 0;
4084
2/2
✓ Branch 0 taken 1272402 times.
✓ Branch 1 taken 35876270 times.
37148672 if (weights[page]) {
4085
2/2
✓ Branch 0 taken 325734912 times.
✓ Branch 1 taken 1272402 times.
327007314 for (uint code = 0; code < 256; ++code) {
4086 325734912 max_len = std::max<int>(max_len, weights[page][code]);
4087 }
4088 }
4089
2/2
✓ Branch 0 taken 35876270 times.
✓ Branch 1 taken 1272402 times.
37148672 if (max_len == 0)
4090 35876270 lengths[page] = 0;
4091 else
4092 1272402 lengths[page] = max_len * MY_UCA_900_CE_SIZE + 1;
4093 }
4094 8536 }
4095
4096 26231 static void copy_ja_han_pages(const CHARSET_INFO *cs, MY_UCA_INFO *dst) {
4097
3/4
✓ Branch 0 taken 26231 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8390 times.
✓ Branch 3 taken 17841 times.
26231 if (!cs->uca || cs->uca->version != UCA_V900 ||
4098
2/2
✓ Branch 0 taken 8096 times.
✓ Branch 1 taken 294 times.
8390 cs->coll_param != &ja_coll_param)
4099 25937 return;
4100
2/2
✓ Branch 0 taken 24108 times.
✓ Branch 1 taken 294 times.
24402 for (int page = MIN_JA_HAN_PAGE; page <= MAX_JA_HAN_PAGE; page++) {
4101 // In DUCET, weight is not assigned to code points in [U+4E00, U+9FFF].
4102 // When re-initializing (after my_coll_uninit_uca), the weights
4103 // may already be set.
4104
3/4
✓ Branch 0 taken 246 times.
✓ Branch 1 taken 23862 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 246 times.
24108 assert(dst->weights[page] == nullptr ||
4105 dst->weights[page] == ja_han_pages[page - MIN_JA_HAN_PAGE]);
4106 24108 dst->weights[page] = ja_han_pages[page - MIN_JA_HAN_PAGE];
4107 }
4108 }
4109
4110 /*
4111 We have reordered all the characters in the pages which contains Chinese Han
4112 characters with uca9dump (see dump_zh_pages() in uca9-dump.cc). Replace the
4113 DUCET pages with these pages.
4114 */
4115 146 static void copy_zh_han_pages(MY_UCA_INFO *dst) {
4116
2/2
✓ Branch 0 taken 98258 times.
✓ Branch 1 taken 146 times.
98404 for (int page = MIN_ZH_HAN_PAGE; page <= MAX_ZH_HAN_PAGE; page++) {
4117
2/2
✓ Branch 0 taken 43070 times.
✓ Branch 1 taken 55188 times.
98258 if (zh_han_pages[page - MIN_ZH_HAN_PAGE]) {
4118 43070 dst->weights[page] = zh_han_pages[page - MIN_ZH_HAN_PAGE];
4119 }
4120 }
4121 146 }
4122
4123 /*
4124 UCA defines an algorithm to calculate character's implicit weight if this
4125 character's weight is not defined in the DUCET. This function is to help
4126 convert Chinese character's implicit weight calculated by UCA back to its code
4127 points.
4128 The implicit weight and the code point is not 1 : 1 map because DUCET lets
4129 some characters share implicit primary weight. For example, the DUCET defines
4130 "2F00 ; [.FB40.0020.0004][.CE00.0000.0000] # KANGXI RADICAL ONE", and 4E00's
4131 implicit weight is [.FB40.0020.0002][.CE00.0000.0000]. We can see the primary
4132 weights of U+2F00 and U+4E00 are same (FB40 CE00).
4133
4134 But for the Han characters in zh.xml file, each one has unique implicit
4135 weight.
4136 */
4137 460522 static inline my_wc_t convert_implicit_to_ch(uint16 first, uint16 second) {
4138 /*
4139 For reference, here is how UCA calculates one character's implicit weight.
4140 AAAA = 0xFB40 + (CP >> 15) # The 0xFB40 changes for different character
4141 # groups
4142 BBBB = (CP & 0x7FFF) | 0x8000
4143 */
4144
2/2
✓ Branch 0 taken 185892 times.
✓ Branch 1 taken 274630 times.
460522 if (first < 0xFB80)
4145 185892 return (((first - 0xFB40) << 15) | (second & 0x7FFF));
4146
2/2
✓ Branch 0 taken 26390 times.
✓ Branch 1 taken 248240 times.
274630 else if (first < 0xFBC0)
4147 26390 return (((first - 0xFB80) << 15) | (second & 0x7FFF));
4148 else
4149 248240 return (((first - 0xFBC0) << 15) | (second & 0x7FFF));
4150 }
4151
4152 /*
4153 Usually we do reordering in apply_reorder_param(). But for the Chinese
4154 collation, since we want to remove the weight gap between the character groups
4155 (see the comment on change_zh_implicit()), and we have done the reordering for
4156 some characters in the pages which contains Chinese Han characters, if we
4157 still use apply_reorder_param() to do the reordering for other characters, we
4158 might meet weight conflict. For example, in the DUCET page, 'A' has primary
4159 weight 0x1C47, but this value has been assigned to the first Chinese Han
4160 character in CLDR zh.xml file.
4161 So we do the reordering for all the DUCET pages when initializing the
4162 collation.
4163 */
4164 146 static void modify_all_zh_pages(Reorder_param *reorder_param, MY_UCA_INFO *dst,
4165 int npages) {
4166 146 std::map<int, int> zh_han_to_single_weight_map;
4167
2/2
✓ Branch 0 taken 6035056 times.
✓ Branch 1 taken 146 times.
6035202 for (int i = 0; i < ZH_HAN_WEIGHT_PAIRS; i++) {
4168 6035056 zh_han_to_single_weight_map[zh_han_to_single_weight[i * 2]] =
4169
1/2
✓ Branch 0 taken 6035056 times.
✗ Branch 1 not taken.
6035056 zh_han_to_single_weight[i * 2 + 1];
4170 }
4171
4172
2/2
✓ Branch 0 taken 635392 times.
✓ Branch 1 taken 146 times.
635538 for (int page = 0; page < npages; page++) {
4173 /*
4174 If there is no page in the DUCET, then all the characters in this page
4175 must have implicit weight. The reordering for it will be done by
4176 change_zh_implicit(). Do not need to change here.
4177 If there is page in zh_han_pages[], then all the characters in this page
4178 have been reordered by uca9dump. Do not need to change here.
4179 */
4180
2/2
✓ Branch 0 taken 22046 times.
✓ Branch 1 taken 613346 times.
635392 if (!dst->weights[page] ||
4181
4/4
✓ Branch 0 taken 15330 times.
✓ Branch 1 taken 6716 times.
✓ Branch 2 taken 14600 times.
✓ Branch 3 taken 730 times.
22046 (page >= MIN_ZH_HAN_PAGE && page <= MAX_ZH_HAN_PAGE &&
4182
2/2
✓ Branch 0 taken 730 times.
✓ Branch 1 taken 13870 times.
14600 zh_han_pages[page - MIN_ZH_HAN_PAGE]))
4183 614076 continue;
4184
2/2
✓ Branch 0 taken 5456896 times.
✓ Branch 1 taken 21316 times.
5478212 for (int off = 0; off < 256; off++) {
4185 5456896 uint16 *wbeg = UCA900_WEIGHT_ADDR(dst->weights[page], 0, off);
4186 5456896 int num_of_ce = UCA900_NUM_OF_CE(dst->weights[page], off);
4187
2/2
✓ Branch 0 taken 6017378 times.
✓ Branch 1 taken 5456896 times.
11474274 for (int ce = 0; ce < num_of_ce; ce++) {
4188
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6017378 times.
6017378 assert(reorder_param->wt_rec_num == 1);
4189
2/2
✓ Branch 0 taken 4349147 times.
✓ Branch 1 taken 1668231 times.
6017378 if (*wbeg >= reorder_param->wt_rec[0].old_wt_bdy.begin &&
4190
2/2
✓ Branch 0 taken 2955082 times.
✓ Branch 1 taken 1394065 times.
4349147 *wbeg <= reorder_param->wt_rec[0].old_wt_bdy.end) {
4191 2955082 *wbeg = *wbeg + reorder_param->wt_rec[0].new_wt_bdy.begin -
4192 2955082 reorder_param->wt_rec[0].old_wt_bdy.begin;
4193
2/2
✓ Branch 0 taken 1356905 times.
✓ Branch 1 taken 1705391 times.
3062296 } else if (*wbeg >= 0xFB00) {
4194 1356905 uint16 next_wt = *(wbeg + UCA900_DISTANCE_BETWEEN_WEIGHTS);
4195
4/4
✓ Branch 0 taken 1356885 times.
✓ Branch 1 taken 20 times.
✓ Branch 2 taken 460522 times.
✓ Branch 3 taken 896363 times.
1356905 if (*wbeg >= 0xFB40 && *wbeg <= 0xFBC1) { // Han's implicit weight
4196 /*
4197 If some characters in DUCET share the same implicit weight, their
4198 reordered weight should be same too.
4199 */
4200 460522 my_wc_t ch = convert_implicit_to_ch(*wbeg, next_wt);
4201
1/2
✓ Branch 0 taken 460522 times.
✗ Branch 1 not taken.
460522 if (zh_han_to_single_weight_map.find(ch) !=
4202
2/2
✓ Branch 0 taken 199231 times.
✓ Branch 1 taken 261291 times.
921044 zh_han_to_single_weight_map.end()) {
4203
1/2
✓ Branch 0 taken 199231 times.
✗ Branch 1 not taken.
199231 *wbeg = zh_han_to_single_weight_map[ch];
4204 199231 *(wbeg + UCA900_DISTANCE_BETWEEN_WEIGHTS) = 0;
4205 199231 wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS;
4206 199231 ce++;
4207 199231 continue;
4208 }
4209 }
4210 1157674 *wbeg = change_zh_implicit(*wbeg);
4211 1157674 wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS;
4212 1157674 ce++;
4213 }
4214 5818147 wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS;
4215 }
4216 }
4217 }
4218 146 }
4219
4220 26377 static bool init_weight_level(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4221 MY_COLL_RULES *rules, int level, MY_UCA_INFO *dst,
4222 const MY_UCA_INFO *src,
4223 bool lengths_are_temporary) {
4224 MY_COLL_RULE *r, *rlast;
4225 26377 size_t i, npages = (src->maxchar + 1) / 256;
4226 26377 bool has_contractions = false;
4227
4228 26377 dst->maxchar = src->maxchar;
4229
4230
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26377 times.
26377 if (check_rules(loader, rules, dst, src)) return true;
4231
4232 /* Allocate memory for pages and their lengths */
4233
2/2
✓ Branch 0 taken 8536 times.
✓ Branch 1 taken 17841 times.
26377 if (lengths_are_temporary) {
4234
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8536 times.
8536 if (!(dst->lengths = (uchar *)(loader->mem_malloc)(npages))) return true;
4235 8536 if (!(dst->weights =
4236
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8536 times.
8536 (uint16 **)(loader->once_alloc)(npages * sizeof(uint16 *)))) {
4237 (loader->mem_free)(dst->lengths);
4238 return true;
4239 }
4240 } else {
4241
2/4
✓ Branch 0 taken 17841 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 17841 times.
35682 if (!(dst->lengths = (uchar *)(loader->once_alloc)(npages)) ||
4242 17841 !(dst->weights =
4243
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17841 times.
17841 (uint16 **)(loader->once_alloc)(npages * sizeof(uint16 *))))
4244 return true;
4245 }
4246
4247 /*
4248 Copy pages lengths and page pointers from the default UCA weights.
4249 */
4250 26377 memcpy(dst->lengths, src->lengths, npages);
4251 26377 memcpy(dst->weights, src->weights, npages * sizeof(uint16 *));
4252
4253 /*
4254 Calculate maximum lengths for the pages which will be overwritten.
4255 Mark pages that will be overwritten as NULL.
4256 We'll allocate their own memory.
4257 */
4258
2/2
✓ Branch 0 taken 1146168 times.
✓ Branch 1 taken 26377 times.
1172545 for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) {
4259
2/2
✓ Branch 0 taken 902949 times.
✓ Branch 1 taken 243219 times.
1146168 if (!r->curr[1]) /* If not a contraction */
4260 {
4261 902949 uint pagec = (r->curr[0] >> 8);
4262
2/2
✓ Branch 0 taken 200090 times.
✓ Branch 1 taken 702859 times.
902949 if (r->base[1]) /* Expansion */
4263 {
4264 /* Reserve space for maximum possible length */
4265 200090 dst->lengths[pagec] = MY_UCA_MAX_WEIGHT_SIZE;
4266 } else {
4267 702859 uint pageb = (r->base[0] >> 8);
4268
6/6
✓ Branch 0 taken 229000 times.
✓ Branch 1 taken 473859 times.
✓ Branch 2 taken 148463 times.
✓ Branch 3 taken 80537 times.
✓ Branch 4 taken 48631 times.
✓ Branch 5 taken 99832 times.
702859 if ((r->diff[0] || r->diff[1] || r->diff[2]) &&
4269
2/2
✓ Branch 0 taken 33833 times.
✓ Branch 1 taken 569194 times.
603027 dst->lengths[pagec] < (src->lengths[pageb] + 3)) {
4270
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 33833 times.
33833 if ((src->lengths[pageb] + 3) > MY_UCA_MAX_WEIGHT_SIZE)
4271 dst->lengths[pagec] = MY_UCA_MAX_WEIGHT_SIZE;
4272 else
4273 33833 dst->lengths[pagec] = src->lengths[pageb] + 3;
4274
2/2
✓ Branch 0 taken 590 times.
✓ Branch 1 taken 668436 times.
669026 } else if (dst->lengths[pagec] < src->lengths[pageb])
4275 590 dst->lengths[pagec] = src->lengths[pageb];
4276 }
4277 902949 dst->weights[pagec] = nullptr; /* Mark that we'll overwrite this page */
4278 } else
4279 243219 has_contractions = true;
4280 }
4281
4282
2/2
✓ Branch 0 taken 8042 times.
✓ Branch 1 taken 18335 times.
26377 if (has_contractions) {
4283 8042 dst->have_contractions = true;
4284
2/4
✓ Branch 0 taken 8042 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8042 times.
✗ Branch 3 not taken.
8042 dst->contraction_nodes = new std::vector<MY_CONTRACTION>(0);
4285 8042 if (!(dst->contraction_flags =
4286
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8042 times.
8042 (char *)(loader->once_alloc)(MY_UCA_CNT_FLAG_SIZE)))
4287 return true;
4288 8042 memset(dst->contraction_flags, 0, MY_UCA_CNT_FLAG_SIZE);
4289 }
4290
2/2
✓ Branch 0 taken 146 times.
✓ Branch 1 taken 26231 times.
26377 if (cs->coll_param == &zh_coll_param) {
4291 /*
4292 We are going to reorder the weight of characters in uca pages when
4293 initializing this collation. And because of the reorder rule [reorder
4294 Hani], we need to change almost every character's weight. So copy all
4295 the pages.
4296 Please also see the comment on modify_all_zh_pages().
4297 */
4298 bool rc;
4299
2/2
✓ Branch 0 taken 635392 times.
✓ Branch 1 taken 146 times.
635538 for (i = 0; i < npages; i++) {
4300
4/6
✓ Branch 0 taken 22046 times.
✓ Branch 1 taken 613346 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 22046 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 635392 times.
635392 if (dst->lengths[i] && (rc = my_uca_copy_page(cs, loader, src, dst, i)))
4301 return rc;
4302 }
4303 146 modify_all_zh_pages(cs->coll_param->reorder_param, dst, npages);
4304 146 copy_zh_han_pages(dst);
4305 } else {
4306 /* Allocate pages that we'll overwrite and copy default weights */
4307
2/2
✓ Branch 0 taken 44693248 times.
✓ Branch 1 taken 26231 times.
44719479 for (i = 0; i < npages; i++) {
4308 bool rc;
4309 /*
4310 Don't touch pages with lengths[i]==0, they have implicit weights
4311 calculated algorithmically.
4312 */
4313
5/6
✓ Branch 0 taken 42395601 times.
✓ Branch 1 taken 2297647 times.
✓ Branch 2 taken 51526 times.
✓ Branch 3 taken 42344075 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 44693248 times.
44744774 if (!dst->weights[i] && dst->lengths[i] &&
4314
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 51526 times.
51526 (rc = my_uca_copy_page(cs, loader, src, dst, i)))
4315 return rc;
4316 }
4317
4318 26231 copy_ja_han_pages(cs, dst);
4319 }
4320
4321 /*
4322 Preparatory step is done at this point.
4323 Now we have memory allocated for the pages that we'll overwrite,
4324 and for contractions, including previous context contractions.
4325 Also, for the pages that we'll overwrite, we have copied default weights.
4326 Now iterate through the rules, overwrite weights for the characters
4327 that appear in the rules, and put all contractions into contraction list.
4328 */
4329
2/2
✓ Branch 0 taken 1146168 times.
✓ Branch 1 taken 26375 times.
1172543 for (r = rules->rule; r < rlast; r++) {
4330
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1146166 times.
1146168 if (apply_one_rule(cs, loader, rules, r, level, dst)) return true;
4331 }
4332 26375 return false;
4333 }
4334
4335 /**
4336 Check whether the composition character is already in rule list
4337 @param rules The rule list
4338 @param wc The composition character
4339 @return true The composition character is already in list
4340 false The composition character is not in list
4341 */
4342 531870 static bool my_comp_in_rulelist(const MY_COLL_RULES *rules, my_wc_t wc) {
4343 MY_COLL_RULE *r, *rlast;
4344
2/2
✓ Branch 0 taken 92625429 times.
✓ Branch 1 taken 431116 times.
93056545 for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) {
4345
4/4
✓ Branch 0 taken 126626 times.
✓ Branch 1 taken 92498803 times.
✓ Branch 2 taken 100754 times.
✓ Branch 3 taken 25872 times.
92625429 if (r->curr[0] == wc && r->curr[1] == 0) return true;
4346 }
4347 431116 return false;
4348 }
4349
4350 /**
4351 Check whether a composition character in the decomposition list is a
4352 normal character.
4353 @param dec_ind The index of composition character in list
4354 @return Whether it is a normal character
4355 */
4356 1606016628 static inline bool my_compchar_is_normal_char(uint dec_ind) {
4357 1606016628 return uni_dec[dec_ind].decomp_tag == DECOMP_TAG_NONE;
4358 }
4359
4360 835412 static inline bool my_compchar_is_normal_char(const Unidata_decomp *decomp) {
4361 835412 return my_compchar_is_normal_char(decomp - std::begin(uni_dec));
4362 }
4363
4364 280528 static Unidata_decomp *get_decomposition(my_wc_t ch) {
4365 3539889 auto comp_func = [](Unidata_decomp x, Unidata_decomp y) {
4366 3539889 return x.charcode < y.charcode;
4367 };
4368 280528 Unidata_decomp to_find = {ch, CHAR_CATEGORY_LU, DECOMP_TAG_NONE, {0}};
4369
1/2
✓ Branch 0 taken 280528 times.
✗ Branch 1 not taken.
280528 Unidata_decomp *decomp = std::lower_bound(
4370 std::begin(uni_dec), std::end(uni_dec), to_find, comp_func);
4371
5/6
✓ Branch 0 taken 280528 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 44996 times.
✓ Branch 3 taken 235532 times.
✓ Branch 4 taken 44996 times.
✓ Branch 5 taken 235532 times.
280528 if (decomp == std::end(uni_dec) || decomp->charcode != ch) return nullptr;
4372 235532 return decomp;
4373 }
4374
4375 3327294 static Combining_mark *my_find_combining_mark(my_wc_t code) {
4376 32563007 auto comp_func = [](Combining_mark x, Combining_mark y) {
4377 32563007 return x.charcode < y.charcode;
4378 };
4379 3327294 Combining_mark to_find = {code, 0};
4380
1/2
✓ Branch 0 taken 3327294 times.
✗ Branch 1 not taken.
3327294 return std::lower_bound(std::begin(combining_marks),
4381 6654588 std::end(combining_marks), to_find, comp_func);
4382 }
4383
4384 /**
4385 Check if a list of combining marks contains the whole list of origin
4386 decomposed combining marks.
4387 @param origin_dec The origin list of combining marks decomposed from
4388 character in tailoring rule.
4389 @param dec_codes The list of combining marks decomposed from
4390 character in decomposition list.
4391 @param dec_diff The combining marks exist in dec_codes but not in
4392 origin_dec.
4393 @return Whether the list of combining marks contains the
4394 whole list of origin combining marks.
4395 */
4396 259235227 static bool my_is_inheritance_of_origin(const my_wc_t *origin_dec,
4397 const my_wc_t *dec_codes,
4398 my_wc_t *dec_diff) {
4399 int ind0, ind1, ind2;
4400
2/2
✓ Branch 0 taken 257573025 times.
✓ Branch 1 taken 1662202 times.
259235227 if (origin_dec[0] != dec_codes[0]) return false;
4401
1/2
✓ Branch 0 taken 2473786 times.
✗ Branch 1 not taken.
4135988 for (ind0 = ind1 = ind2 = 1; ind0 < MY_UCA_MAX_CONTRACTION &&
4402 2473786 ind1 < MY_UCA_MAX_CONTRACTION &&
4403
5/6
✓ Branch 0 taken 2473786 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2242676 times.
✓ Branch 3 taken 231110 times.
✓ Branch 4 taken 1830787 times.
✓ Branch 5 taken 411889 times.
4947572 origin_dec[ind0] && dec_codes[ind1];) {
4404
2/2
✓ Branch 0 taken 167140 times.
✓ Branch 1 taken 1663647 times.
1830787 if (origin_dec[ind0] == dec_codes[ind1]) {
4405 167140 ind0++;
4406 167140 ind1++;
4407 } else {
4408 1663647 Combining_mark *mark0 = my_find_combining_mark(origin_dec[ind0]);
4409 1663647 Combining_mark *mark1 = my_find_combining_mark(dec_codes[ind1]);
4410
2/2
✓ Branch 0 taken 1019203 times.
✓ Branch 1 taken 644444 times.
1663647 if (mark0->ccc == mark1->ccc) return false;
4411 644444 dec_diff[ind2++] = dec_codes[ind1++];
4412 }
4413 }
4414
3/4
✓ Branch 0 taken 642999 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 231110 times.
✓ Branch 3 taken 411889 times.
642999 if (ind0 >= MY_UCA_MAX_CONTRACTION || !origin_dec[ind0]) {
4415
2/2
✓ Branch 0 taken 981776 times.
✓ Branch 1 taken 231110 times.
1212886 while (ind1 < MY_UCA_MAX_CONTRACTION) {
4416 981776 dec_diff[ind2++] = dec_codes[ind1++];
4417 }
4418 231110 return true;
4419 }
4420 411889 return false;
4421 }
4422
4423 /**
4424 Add new rules recersively if one rule's characters are in decomposition
4425 list.
4426 @param rules The rule list
4427 @param r The rule to check
4428 @param decomp_rec The decomposition of the character in rule.
4429 @param comp_added Bitset which marks whether the comp
4430 character has been added to rule list.
4431 @return 1 Error adding new rules
4432 0 Add rules successfully
4433 */
4434 280528 static int my_coll_add_inherit_rules(
4435 MY_COLL_RULES *rules, MY_COLL_RULE *r, const Unidata_decomp *decomp_rec,
4436 std::bitset<array_elements(uni_dec)> *comp_added) {
4437
2/2
✓ Branch 0 taken 1605181216 times.
✓ Branch 1 taken 280528 times.
1605461744 for (uint dec_ind = 0; dec_ind < array_elements(uni_dec); dec_ind++) {
4438 /*
4439 For normal character which can be decomposed, it is always decomposed to
4440 be another character and one combining mark.
4441
4442 Currently we only support the weight inheritance of character that can be
4443 canonical-decomposed to another character and a list of combining marks.
4444 So skip the compatibility decomposition.
4445
4446 Sample from UnicodeData.txt:
4447 Canonical decomposition: U+00DC : U+0055 U+0308
4448 Compatibility decompsition: U+FF59 : <wide> U+0079
4449 */
4450
9/10
✓ Branch 0 taken 577887680 times.
✓ Branch 1 taken 1027293536 times.
✓ Branch 2 taken 577887680 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 569131501 times.
✓ Branch 5 taken 8756179 times.
✓ Branch 6 taken 477018736 times.
✓ Branch 7 taken 92112765 times.
✓ Branch 8 taken 1345945989 times.
✓ Branch 9 taken 259235227 times.
2082199952 if (!my_compchar_is_normal_char(dec_ind) || comp_added->test(dec_ind) ||
4451 477018736 (decomp_rec != nullptr &&
4452
2/2
✓ Branch 0 taken 309896274 times.
✓ Branch 1 taken 167122462 times.
477018736 uni_dec[dec_ind].decomp_tag != decomp_rec->decomp_tag))
4453 1345945989 continue;
4454 /*
4455 In DUCET, all accented character's weight is defined as base
4456 character's weight followed by accent mark's weight. For example:
4457 00DC = 0055 + 0308
4458 0055 ; [.1E30.0020.0008] # LATIN CAPITAL LETTER U
4459 0308 ; [.0000.002B.0002] # COMBINING DIAERESIS
4460 00DC ; [.1E30.0020.0008][.0000.002B.0002] # LATIN CAPITAL LETTER U
4461 WITH DIAERESIS
4462 So the composition character's rule should be same as origin rule
4463 except of the change of curr value.
4464 */
4465 259235227 my_wc_t dec_diff[MY_UCA_MAX_CONTRACTION]{r->curr[0], 0};
4466 259235227 my_wc_t orig_dec[MY_UCA_MAX_CONTRACTION]{0};
4467
2/2
✓ Branch 0 taken 92112765 times.
✓ Branch 1 taken 167122462 times.
259235227 if (decomp_rec == nullptr) {
4468 /*
4469 If there is no decomposition record found in Unidata_decomp, it means
4470 its decomposition form is itself.
4471 */
4472 92112765 orig_dec[0] = r->curr[0];
4473 } else {
4474 167122462 memcpy(orig_dec, decomp_rec->dec_codes, sizeof(orig_dec));
4475 }
4476
1/2
✓ Branch 0 taken 259235227 times.
✗ Branch 1 not taken.
259235227 if (my_is_inheritance_of_origin(orig_dec, uni_dec[dec_ind].dec_codes,
4477
4/4
✓ Branch 0 taken 231110 times.
✓ Branch 1 taken 259004117 times.
✓ Branch 2 taken 141014 times.
✓ Branch 3 taken 259094213 times.
259466337 dec_diff) &&
4478
2/2
✓ Branch 0 taken 141014 times.
✓ Branch 1 taken 90096 times.
231110 !my_comp_in_rulelist(rules, uni_dec[dec_ind].charcode)) {
4479 141014 MY_COLL_RULE newrule{{0}, {uni_dec[dec_ind].charcode, 0}, {0}, 0, false};
4480 141014 memcpy(newrule.base, dec_diff, sizeof(newrule.base));
4481
2/4
✓ Branch 0 taken 141014 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 141014 times.
141014 if (my_coll_rules_add(rules, &newrule)) return 1;
4482
1/2
✓ Branch 0 taken 141014 times.
✗ Branch 1 not taken.
141014 comp_added->set(dec_ind);
4483 }
4484 }
4485 280528 return 0;
4486 }
4487
4488 290102 static bool combining_mark_in_rulelist(const my_wc_t *dec_codes,
4489 const MY_COLL_RULE *r_start,
4490 const MY_COLL_RULE *r_end) {
4491
1/2
✓ Branch 0 taken 430992 times.
✗ Branch 1 not taken.
430992 for (int i = 1; i < MY_UCA_MAX_CONTRACTION; ++i) {
4492
2/2
✓ Branch 0 taken 258566 times.
✓ Branch 1 taken 172426 times.
430992 if (!*(dec_codes + i)) return false;
4493
2/2
✓ Branch 0 taken 11224334 times.
✓ Branch 1 taken 140890 times.
11365224 for (const MY_COLL_RULE *r = r_start; r < r_end; ++r) {
4494
2/2
✓ Branch 0 taken 31536 times.
✓ Branch 1 taken 11192798 times.
11224334 if (r->curr[0] == *(dec_codes + i)) {
4495 31536 return true;
4496 }
4497 }
4498 }
4499 return false;
4500 }
4501
4502 2752 static int add_normalization_rules(const CHARSET_INFO *cs,
4503 MY_COLL_RULES *rules) {
4504
3/4
✓ Branch 0 taken 2752 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2606 times.
✓ Branch 3 taken 146 times.
2752 if (!cs->coll_param || !cs->coll_param->norm_enabled) return 0;
4505 146 const int orig_rule_num = rules->nrules;
4506
2/2
✓ Branch 0 taken 835412 times.
✓ Branch 1 taken 146 times.
835558 for (Unidata_decomp *decomp = std::begin(uni_dec); decomp < std::end(uni_dec);
4507 ++decomp) {
4508
2/2
✓ Branch 0 taken 290102 times.
✓ Branch 1 taken 10658 times.
1136172 if (!my_compchar_is_normal_char(decomp) ||
4509
4/4
✓ Branch 0 taken 300760 times.
✓ Branch 1 taken 534652 times.
✓ Branch 2 taken 803876 times.
✓ Branch 3 taken 31536 times.
1136172 my_comp_in_rulelist(rules, decomp->charcode) ||
4510
2/2
✓ Branch 0 taken 258566 times.
✓ Branch 1 taken 31536 times.
290102 !combining_mark_in_rulelist(decomp->dec_codes, rules->rule,
4511 290102 rules->rule + orig_rule_num))
4512 803876 continue;
4513 31536 MY_COLL_RULE newrule{{0}, {decomp->charcode, 0}, {0}, 0, false};
4514 31536 memcpy(newrule.base, decomp->dec_codes, sizeof(newrule.base));
4515
2/4
✓ Branch 0 taken 31536 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 31536 times.
31536 if (my_coll_rules_add(rules, &newrule)) return 1;
4516 }
4517 146 return 0;
4518 }
4519
4520 /**
4521 For every rule in rule list, check and add new rules if it is in
4522 decomposition list.
4523 @param cs Character set info
4524 @param rules The rule list
4525 @return 1 Error happens when adding new rule
4526 0 Add rules successfully
4527 */
4528 26377 static int my_coll_check_rule_and_inherit(const CHARSET_INFO *cs,
4529 MY_COLL_RULES *rules) {
4530
2/2
✓ Branch 0 taken 17841 times.
✓ Branch 1 taken 8536 times.
26377 if (rules->uca->version != UCA_V900) return 0;
4531
4532 /*
4533 Character can combine with marks to be a new character. For example,
4534 A + [mark b] = A1, A1 + [mark c] = A2. We think the weight of A1 and
4535 A2 should shift with A if A is in rule list and its weight shifts,
4536 unless A1 / A2 is already in rule list.
4537 */
4538 8536 std::bitset<array_elements(uni_dec)> comp_added;
4539 8536 int orig_rule_num = rules->nrules;
4540
2/2
✓ Branch 0 taken 497589 times.
✓ Branch 1 taken 8536 times.
506125 for (int i = 0; i < orig_rule_num; ++i) {
4541 497589 MY_COLL_RULE r = *(rules->rule + i);
4542 /*
4543 Do not add inheritance rule for contraction.
4544 But for the Chinese collation, the weight shift rule of Chinese collation
4545 is a bit different from all the languages we added so far. For example, it
4546 has a rule "&e << ... << e\\u0302\\u0300". So far, if a language's rule
4547 involves 'e\\u0302\\u0300', it will use the combining form character,
4548 U+1EC1, and it is not a contraction. If we don't handle this for Chinese
4549 collation, it will skip some further rule inheriting.
4550 */
4551
4/4
✓ Branch 0 taken 420793 times.
✓ Branch 1 taken 76796 times.
✓ Branch 2 taken 217061 times.
✓ Branch 3 taken 203732 times.
497589 if (cs->coll_param != &zh_coll_param && r.curr[1]) continue;
4552
1/2
✓ Branch 0 taken 280528 times.
✗ Branch 1 not taken.
280528 Unidata_decomp *decomp_rec = get_decomposition(r.curr[0]);
4553
2/4
✓ Branch 0 taken 280528 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 280528 times.
280528 if (my_coll_add_inherit_rules(rules, &r, decomp_rec, &comp_added)) return 1;
4554 }
4555 8536 return 0;
4556 }
4557
4558 /**
4559 Helper function to store weight boundary values.
4560 @param[out] wt_rec Weight boundary for each character group and gap
4561 between groups
4562 @param rec_ind The position from where to store weight boundary
4563 @param old_begin Beginning weight of character group before reorder
4564 @param old_end End weight of character group before reorder
4565 @param new_begin Beginning weight of character group after reorder
4566 @param new_end End weight of character group after reorder
4567 */
4568 5213 static inline void my_set_weight_rec(
4569 Reorder_wt_rec (&wt_rec)[2 * UCA_MAX_CHAR_GRP], int rec_ind,
4570 uint16 old_begin, uint16 old_end, uint16 new_begin, uint16 new_end) {
4571 5213 wt_rec[rec_ind] = {{old_begin, old_end}, {new_begin, new_end}};
4572 5213 }
4573
4574 /**
4575 Calculate the reorder parameters for the character groups.
4576 @param cs Character set info
4577 @param[out] rec_ind The position from where to store weight boundary
4578 */
4579 2027 static void my_calc_char_grp_param(const CHARSET_INFO *cs, int &rec_ind) {
4580 2027 int weight_start = START_WEIGHT_TO_REORDER;
4581 2027 int grp_ind = 0;
4582 2027 Reorder_param *param = cs->coll_param->reorder_param;
4583
1/2
✓ Branch 0 taken 5213 times.
✗ Branch 1 not taken.
5213 for (; grp_ind < UCA_MAX_CHAR_GRP; ++grp_ind) {
4584
2/2
✓ Branch 0 taken 2027 times.
✓ Branch 1 taken 3186 times.
5213 if (param->reorder_grp[grp_ind] == CHARGRP_NONE) break;
4585 8987 for (Char_grp_info *info = std::begin(char_grp_infos);
4586
1/2
✓ Branch 0 taken 8987 times.
✗ Branch 1 not taken.
8987 info < std::end(char_grp_infos); ++info) {
4587
2/2
✓ Branch 0 taken 5801 times.
✓ Branch 1 taken 3186 times.
8987 if (param->reorder_grp[grp_ind] != info->group) continue;
4588 3186 my_set_weight_rec(
4589 3186 param->wt_rec, grp_ind, info->grp_wt_bdy.begin, info->grp_wt_bdy.end,
4590 weight_start,
4591 3186 weight_start + info->grp_wt_bdy.end - info->grp_wt_bdy.begin);
4592 3186 weight_start = param->wt_rec[grp_ind].new_wt_bdy.end + 1;
4593 3186 break;
4594 }
4595 }
4596 2027 rec_ind = grp_ind;
4597 2027 }
4598
4599 /**
4600 Calculate the reorder parameters for the gap between character groups.
4601 @param cs Character set info
4602 @param rec_ind The position from where to store weight boundary
4603 */
4604 2027 static void my_calc_char_grp_gap_param(CHARSET_INFO *cs, int &rec_ind) {
4605 2027 Reorder_param *param = cs->coll_param->reorder_param;
4606 2027 uint16 weight_start = param->wt_rec[rec_ind - 1].new_wt_bdy.end + 1;
4607 2027 Char_grp_info *last_grp = nullptr;
4608 12162 for (Char_grp_info *info = std::begin(char_grp_infos);
4609
2/2
✓ Branch 0 taken 10135 times.
✓ Branch 1 taken 2027 times.
12162 info < std::end(char_grp_infos); ++info) {
4610
1/2
✓ Branch 0 taken 21720 times.
✗ Branch 1 not taken.
21720 for (int ind = 0; ind < UCA_MAX_CHAR_GRP; ++ind) {
4611
2/2
✓ Branch 0 taken 6949 times.
✓ Branch 1 taken 14771 times.
21720 if (param->reorder_grp[ind] == CHARGRP_NONE) break;
4612
2/2
✓ Branch 0 taken 11585 times.
✓ Branch 1 taken 3186 times.
14771 if (param->reorder_grp[ind] != info->group) continue;
4613
2/2
✓ Branch 0 taken 727 times.
✓ Branch 1 taken 2459 times.
3186 if (param->max_weight < info->grp_wt_bdy.end)
4614 727 param->max_weight = info->grp_wt_bdy.end;
4615 /*
4616 There might be some character groups before the first character
4617 group in our list.
4618 */
4619
4/4
✓ Branch 0 taken 2027 times.
✓ Branch 1 taken 1159 times.
✓ Branch 2 taken 868 times.
✓ Branch 3 taken 1159 times.
3186 if (!last_grp && info->grp_wt_bdy.begin > START_WEIGHT_TO_REORDER) {
4620 868 my_set_weight_rec(param->wt_rec, rec_ind, START_WEIGHT_TO_REORDER,
4621 868 info->grp_wt_bdy.begin - 1, weight_start,
4622 868 weight_start + (info->grp_wt_bdy.begin - 1) -
4623 START_WEIGHT_TO_REORDER);
4624 868 weight_start = param->wt_rec[rec_ind].new_wt_bdy.end + 1;
4625 868 rec_ind++;
4626 }
4627 /* Gap between 2 character groups in out list. */
4628
3/4
✓ Branch 0 taken 1159 times.
✓ Branch 1 taken 2027 times.
✓ Branch 2 taken 1159 times.
✗ Branch 3 not taken.
3186 if (last_grp && last_grp->grp_wt_bdy.end < (info->grp_wt_bdy.begin - 1)) {
4629 1159 my_set_weight_rec(param->wt_rec, rec_ind, last_grp->grp_wt_bdy.end + 1,
4630 1159 info->grp_wt_bdy.begin - 1, weight_start,
4631 1159 weight_start + (info->grp_wt_bdy.begin - 1) -
4632 1159 (last_grp->grp_wt_bdy.end + 1));
4633 1159 weight_start = param->wt_rec[rec_ind].new_wt_bdy.end + 1;
4634 1159 rec_ind++;
4635 }
4636 3186 last_grp = info;
4637 3186 break;
4638 }
4639 }
4640 2027 param->wt_rec_num = rec_ind;
4641 2027 }
4642
4643 /**
4644 Prepare reorder parameters.
4645 @param cs Character set info
4646 */
4647 2752 static int my_prepare_reorder(CHARSET_INFO *cs) {
4648 /*
4649 Chinese collation's reordering is done in next_implicit() and
4650 modify_all_zh_pages(). See the comment on zh_reorder_param and
4651 change_zh_implicit().
4652 */
4653
4/4
✓ Branch 0 taken 2173 times.
✓ Branch 1 taken 579 times.
✓ Branch 2 taken 146 times.
✓ Branch 3 taken 2027 times.
2752 if (!cs->coll_param->reorder_param || cs->coll_param == &zh_coll_param)
4654 725 return 0;
4655 /*
4656 For each group of character, for example, latin characters,
4657 their weights are in a separate range. The default sequence
4658 of these groups is: Latin, Greek, Coptic, Cyrillic, and so
4659 on. Some languages want to change the default sequence. For
4660 example, Croatian wants to put Cyrillic to just behind Latin.
4661 We need to reorder the character groups and change their
4662 weight accordingly. Here we calculate the parameters needed
4663 for weight change. And the change will happen when weight
4664 returns from strnxfrm.
4665 */
4666 2027 int rec_ind = 0;
4667 2027 my_calc_char_grp_param(cs, rec_ind);
4668 2027 my_calc_char_grp_gap_param(cs, rec_ind);
4669 2027 return rec_ind;
4670 }
4671
4672 294 static void adjust_japanese_weight(CHARSET_INFO *cs, int rec_ind) {
4673 /*
4674 Per CLDR 30, Japanese collations need to reorder characters as
4675 [Latin, Kana, Han, others]. So for the original character group list:
4676 [Latin, CharA, Kana, CharB, Han, Others], it should be reordered as
4677 [Latin, Kana, Han, CharA, CharB, Others]. But my_prepare_reorder()
4678 reorders original group to be [Latin, Kana, CharA, CharB, Han, Others].
4679 This is because Han characters are different from others in that Han
4680 characters' weight is implicit and has two primary weights for each
4681 character. Other characters have only one primary weight for each (base)
4682 character. Han characters always sort bigger.
4683
4684 CLDR defines the collating order for 6355 Japanese Han characters. All
4685 of them are in [U+4E00, U+9FFF]; we give them tailored primary weights
4686 in ja_han_pages. The tailored primary weights are just after Kana,
4687 because these characters are very common. These Han characters' weight
4688 pages will be added to collation's UCA data in copy_ja_han_pages().
4689 For the other Han characters, we don't change their implicit weights,
4690 which is [FB80 - FB85, 0020, 0002][XXXX, 0000, 0000].
4691
4692 To make sure CharA and CharB's weight is greater than all Han characters,
4693 we give them weight as [FB86, 0000, 0000][origin weights]. This will be
4694 done in apply_reorder_param().
4695
4696 Because the values stored in last wt_rec element is calculated for moving
4697 CharA to be after Kana, but we want them to be after all Han character,
4698 we reset the weight boundary here, and will change all these characters'
4699 weight in apply_reorder_param().
4700 */
4701 294 Reorder_param *param = cs->coll_param->reorder_param;
4702 294 param->wt_rec[rec_ind - 1].new_wt_bdy.begin = 0;
4703 294 param->wt_rec[rec_ind - 1].new_wt_bdy.end = 0;
4704 294 param->wt_rec[rec_ind].old_wt_bdy.begin = param->wt_rec[1].old_wt_bdy.end + 1;
4705 294 param->wt_rec[rec_ind].old_wt_bdy.end = 0x54A3;
4706 294 param->wt_rec[rec_ind].new_wt_bdy.begin = 0;
4707 294 param->wt_rec[rec_ind].new_wt_bdy.end = 0;
4708 294 param->wt_rec_num++;
4709 294 param->max_weight = 0x54A3;
4710 294 }
4711
4712 /**
4713 Prepare parametric tailoring, like reorder, etc.
4714 @param cs Character set info
4715 @param rules Collation rule list to add to.
4716 @return false Collation parameters applied successfully.
4717 true Error happened.
4718 */
4719 26377 static bool my_prepare_coll_param(CHARSET_INFO *cs, MY_COLL_RULES *rules) {
4720
4/4
✓ Branch 0 taken 8536 times.
✓ Branch 1 taken 17841 times.
✓ Branch 2 taken 5784 times.
✓ Branch 3 taken 2752 times.
26377 if (rules->uca->version != UCA_V900 || !cs->coll_param) return false;
4721
4722 2752 int rec_ind = my_prepare_reorder(cs);
4723
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2752 times.
2752 if (add_normalization_rules(cs, rules)) return true;
4724
4725
2/2
✓ Branch 0 taken 294 times.
✓ Branch 1 taken 2458 times.
2752 if (cs->coll_param == &ja_coll_param) adjust_japanese_weight(cs, rec_ind);
4726 /* Might add other parametric tailoring rules later. */
4727 2752 return false;
4728 }
4729
4730 /*
4731 This function copies an UCS2 collation from
4732 the default Unicode Collation Algorithm (UCA)
4733 weights applying tailorings, i.e. a set of
4734 alternative weights for some characters.
4735
4736 The default UCA weights are stored in uca_weight/uca_length.
4737 They consist of 256 pages, 256 character each.
4738
4739 If a page is not overwritten by tailoring rules,
4740 it is copies as is from UCA as is.
4741
4742 If a page contains some overwritten characters, it is
4743 allocated. Untouched characters are copied from the
4744 default weights.
4745 */
4746
4747 125654 static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) {
4748
2/2
✓ Branch 0 taken 99274 times.
✓ Branch 1 taken 26380 times.
125654 if (!cs->tailoring)
4749 99274 return false; /* Ok to add a collation without tailoring */
4750
4751 MY_COLL_RULES rules;
4752 26380 MY_UCA_INFO new_uca, *src_uca = nullptr;
4753 26380 int rc = 0;
4754 MY_UCA_INFO *src, *dst;
4755 size_t npages;
4756 bool lengths_are_temporary;
4757
4758 26380 loader->errcode = 0;
4759 26380 *loader->errarg = '\0';
4760
4761 26380 memset(&rules, 0, sizeof(rules));
4762 26380 rules.loader = loader;
4763
1/2
✓ Branch 0 taken 26380 times.
✗ Branch 1 not taken.
26380 rules.uca = cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
4764 26380 memset(&new_uca, 0, sizeof(new_uca));
4765
4766 /* Parse ICU Collation Customization expression */
4767
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 26377 times.
26380 if ((rc = my_coll_rule_parse(&rules, cs->tailoring,
4768
1/2
✓ Branch 0 taken 26380 times.
✗ Branch 1 not taken.
26380 cs->tailoring + strlen(cs->tailoring),
4769 cs->m_coll_name)))
4770 3 goto ex;
4771
4772
2/4
✓ Branch 0 taken 26377 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 26377 times.
26377 if ((rc = my_coll_check_rule_and_inherit(cs, &rules))) goto ex;
4773
4774
2/4
✓ Branch 0 taken 26377 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 26377 times.
26377 if ((rc = my_prepare_coll_param(cs, &rules))) goto ex;
4775
4776
2/2
✓ Branch 0 taken 882 times.
✓ Branch 1 taken 25495 times.
26377 if (rules.uca->version == UCA_V520) /* Unicode-5.2.0 requested */
4777 {
4778 882 src_uca = &my_uca_v520;
4779 882 cs->caseinfo = &my_unicase_unicode520;
4780
2/2
✓ Branch 0 taken 16959 times.
✓ Branch 1 taken 8536 times.
25495 } else if (rules.uca->version == UCA_V400) /* Unicode-4.0.0 requested */
4781 {
4782 16959 src_uca = &my_uca_v400;
4783
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16959 times.
16959 if (!cs->caseinfo) cs->caseinfo = &my_unicase_default;
4784 } else /* No Unicode version specified */
4785 {
4786
1/2
✓ Branch 0 taken 8536 times.
✗ Branch 1 not taken.
8536 src_uca = cs->uca ? cs->uca : &my_uca_v400;
4787
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8536 times.
8536 if (!cs->caseinfo) cs->caseinfo = &my_unicase_default;
4788 }
4789
4790 /*
4791 For UCA 9.0.0, we don't have a length page, but we still create one
4792 temporarily so that we can keep track of much memory we need to
4793 allocate for weights.
4794 */
4795 26377 src = src_uca;
4796 26377 dst = &new_uca;
4797
4798 26377 dst->extra_ce_pri_base = cs->uca->extra_ce_pri_base;
4799 26377 dst->extra_ce_sec_base = cs->uca->extra_ce_sec_base;
4800 26377 dst->extra_ce_ter_base = cs->uca->extra_ce_ter_base;
4801
4/4
✓ Branch 0 taken 2752 times.
✓ Branch 1 taken 23625 times.
✓ Branch 2 taken 146 times.
✓ Branch 3 taken 2606 times.
26377 if (cs->coll_param && cs->coll_param == &zh_coll_param) {
4802 146 dst->extra_ce_pri_base = ZH_EXTRA_CE_PRI;
4803 }
4804
4805 26377 npages = (src->maxchar + 1) / 256;
4806
2/2
✓ Branch 0 taken 8536 times.
✓ Branch 1 taken 17841 times.
26377 if (rules.uca->version == UCA_V900) {
4807
2/4
✓ Branch 0 taken 8536 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 8536 times.
8536 if (!(src->lengths = (uchar *)(loader->mem_malloc)(npages))) goto ex;
4808
1/2
✓ Branch 0 taken 8536 times.
✗ Branch 1 not taken.
8536 synthesize_lengths_900(src->lengths, src->weights, npages);
4809 }
4810
4811 26377 lengths_are_temporary = (rules.uca->version == UCA_V900);
4812
3/4
✓ Branch 0 taken 26377 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 26375 times.
26377 if ((rc = init_weight_level(cs, loader, &rules, 0, dst, src,
4813 lengths_are_temporary)))
4814 2 goto ex;
4815
4816
2/2
✓ Branch 0 taken 8536 times.
✓ Branch 1 taken 17839 times.
26375 if (lengths_are_temporary) {
4817
1/2
✓ Branch 0 taken 8536 times.
✗ Branch 1 not taken.
8536 (loader->mem_free)(src->lengths);
4818
1/2
✓ Branch 0 taken 8536 times.
✗ Branch 1 not taken.
8536 (loader->mem_free)(dst->lengths);
4819 8536 src->lengths = nullptr;
4820 8536 dst->lengths = nullptr;
4821 }
4822
4823 26375 new_uca.version = src_uca->version;
4824
2/4
✓ Branch 0 taken 26375 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 26375 times.
26375 if (!(cs->uca = (MY_UCA_INFO *)(loader->once_alloc)(sizeof(MY_UCA_INFO)))) {
4825 rc = 1;
4826 goto ex;
4827 }
4828 26375 memset(cs->uca, 0, sizeof(MY_UCA_INFO));
4829 26375 cs->uca[0] = new_uca;
4830
4831 26380 ex:
4832
1/2
✓ Branch 0 taken 26380 times.
✗ Branch 1 not taken.
26380 (loader->mem_free)(rules.rule);
4833
3/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 26375 times.
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
26380 if (rc != 0 && loader->errcode) {
4834
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
5 if (new_uca.contraction_nodes) delete new_uca.contraction_nodes;
4835
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 loader->reporter(ERROR_LEVEL, loader->errcode, loader->errarg);
4836 }
4837 26380 return rc;
4838 }
4839
4840 17878671 static void my_coll_uninit_uca(CHARSET_INFO *cs) {
4841
4/4
✓ Branch 0 taken 6660358 times.
✓ Branch 1 taken 11218313 times.
✓ Branch 2 taken 8040 times.
✓ Branch 3 taken 6652318 times.
17878671 if (cs->uca && cs->uca->contraction_nodes) {
4842
1/2
✓ Branch 0 taken 8040 times.
✗ Branch 1 not taken.
8040 delete cs->uca->contraction_nodes;
4843 8040 cs->uca->contraction_nodes = nullptr;
4844 8040 cs->state &= ~MY_CS_READY;
4845 }
4846 17878671 }
4847 /*
4848 Universal CHARSET_INFO compatible wrappers
4849 for the above internal functions.
4850 Should work for any character set.
4851 */
4852
4853 extern "C" {
4854 125654 static bool my_coll_init_uca(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) {
4855 125654 cs->pad_char = ' ';
4856 125654 cs->ctype = my_charset_utf8_unicode_ci.ctype;
4857
2/2
✓ Branch 0 taken 39 times.
✓ Branch 1 taken 125615 times.
125654 if (!cs->caseinfo) cs->caseinfo = &my_unicase_default;
4858
2/2
✓ Branch 0 taken 16964 times.
✓ Branch 1 taken 108690 times.
125654 if (!cs->uca) cs->uca = &my_uca_v400;
4859 125654 return create_tailoring(cs, loader);
4860 }
4861
4862 80 static int my_strnncoll_any_uca(const CHARSET_INFO *cs, const uchar *s,
4863 size_t slen, const uchar *t, size_t tlen,
4864 bool t_is_prefix) {
4865
2/2
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 26 times.
80 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4866
1/2
✓ Branch 0 taken 54 times.
✗ Branch 1 not taken.
54 return my_strnncoll_uca<uca_scanner_any<Mb_wc_utf8mb4>, 1>(
4867 54 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4868 }
4869
4870 26 Mb_wc_through_function_pointer mb_wc(cs);
4871
1/2
✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
26 return my_strnncoll_uca<uca_scanner_any<decltype(mb_wc)>, 1>(
4872 26 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4873 }
4874
4875 273983 static int my_strnncollsp_any_uca(const CHARSET_INFO *cs, const uchar *s,
4876 size_t slen, const uchar *t, size_t tlen) {
4877
2/2
✓ Branch 0 taken 145738 times.
✓ Branch 1 taken 128245 times.
273983 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4878
1/2
✓ Branch 0 taken 145738 times.
✗ Branch 1 not taken.
145738 return my_strnncollsp_uca(cs, Mb_wc_utf8mb4(), s, slen, t, tlen);
4879 }
4880
4881 128245 Mb_wc_through_function_pointer mb_wc(cs);
4882
1/2
✓ Branch 0 taken 128320 times.
✗ Branch 1 not taken.
128241 return my_strnncollsp_uca(cs, mb_wc, s, slen, t, tlen);
4883 }
4884
4885 3136 static void my_hash_sort_any_uca(const CHARSET_INFO *cs, const uchar *s,
4886 size_t slen, uint64 *n1, uint64 *n2) {
4887
2/2
✓ Branch 0 taken 600 times.
✓ Branch 1 taken 2536 times.
3136 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4888 600 my_hash_sort_uca(cs, Mb_wc_utf8mb4(), s, slen, n1, n2);
4889 } else {
4890 2536 Mb_wc_through_function_pointer mb_wc(cs);
4891
1/2
✓ Branch 0 taken 2536 times.
✗ Branch 1 not taken.
2536 my_hash_sort_uca(cs, mb_wc, s, slen, n1, n2);
4892 }
4893 3136 }
4894
4895 83038229 static size_t my_strnxfrm_any_uca(const CHARSET_INFO *cs, uchar *dst,
4896 size_t dstlen, uint num_codepoints,
4897 const uchar *src, size_t srclen, uint flags) {
4898
2/2
✓ Branch 0 taken 26750590 times.
✓ Branch 1 taken 56287639 times.
83038229 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4899
1/2
✓ Branch 0 taken 26750590 times.
✗ Branch 1 not taken.
26750590 return my_strnxfrm_uca(cs, Mb_wc_utf8mb4(), dst, dstlen, num_codepoints,
4900 26750590 src, srclen, flags);
4901 }
4902
4903 56287639 Mb_wc_through_function_pointer mb_wc(cs);
4904
1/2
✓ Branch 0 taken 56287639 times.
✗ Branch 1 not taken.
56287639 return my_strnxfrm_uca(cs, mb_wc, dst, dstlen, num_codepoints, src, srclen,
4905 56287639 flags);
4906 }
4907
4908 5068609466 static int my_strnncoll_uca_900(const CHARSET_INFO *cs, const uchar *s,
4909 size_t slen, const uchar *t, size_t tlen,
4910 bool t_is_prefix) {
4911
1/2
✓ Branch 0 taken 5068621631 times.
✗ Branch 1 not taken.
5068609466 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4912
4/5
✓ Branch 0 taken 5068611774 times.
✓ Branch 1 taken 4591 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2891 times.
✓ Branch 4 taken 2375 times.
5068621631 switch (cs->levels_for_compare) {
4913 5068611774 case 1:
4914
1/2
✓ Branch 0 taken 5068579639 times.
✗ Branch 1 not taken.
5068611774 return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 1>, 1>(
4915 5068579639 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4916 4591 case 2:
4917
1/2
✓ Branch 0 taken 4591 times.
✗ Branch 1 not taken.
4591 return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 2>, 2>(
4918 4591 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4919 default:
4920 assert(false);
4921 2891 case 3:
4922
1/2
✓ Branch 0 taken 2891 times.
✗ Branch 1 not taken.
2891 return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 3>, 3>(
4923 2891 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4924 2375 case 4:
4925
1/2
✓ Branch 0 taken 62 times.
✗ Branch 1 not taken.
2375 return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 4>, 4>(
4926 62 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4927 }
4928 }
4929
4930 Mb_wc_through_function_pointer mb_wc(cs);
4931 switch (cs->levels_for_compare) {
4932 case 1:
4933 return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 1>, 1>(
4934 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4935 case 2:
4936 return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 2>, 2>(
4937 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4938 default:
4939 assert(false);
4940 case 3:
4941 return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 3>, 3>(
4942 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4943 case 4:
4944 return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 4>, 4>(
4945 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4946 }
4947 }
4948
4949 675419125 static int my_strnncollsp_uca_900(const CHARSET_INFO *cs, const uchar *s,
4950 size_t slen, const uchar *t, size_t tlen) {
4951 // We are a NO PAD collation, so this is identical to strnncoll.
4952 675419125 return my_strnncoll_uca_900(cs, s, slen, t, tlen, false);
4953 }
4954
4955 } // extern "C"
4956
4957 template <class Mb_wc, int LEVELS_FOR_COMPARE>
4958 188356004 static void my_hash_sort_uca_900_tmpl(const CHARSET_INFO *cs, const Mb_wc mb_wc,
4959 const uchar *s, size_t slen, uint64 *n1) {
4960 188356004 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE> scanner(mb_wc, cs, s, slen);
4961
4962 /*
4963 A variation of the FNV-1a hash. The differences between this and
4964 standard FNV-1a as described in literature are:
4965
4966 - We work naturally on 16-bit weights, so we XOR in the entire weight
4967 instead of hashing byte-by-byte. (This is effectively a speed/quality
4968 tradeoff, as it will reduce avalanche.)
4969 - We use the n1 seed by XOR-ing it onto the offset basis; FNV-1a as
4970 typically described does not use a seed. This should be safe, since
4971 there's nothing magical about the offset basis; it's just the FNV-1a
4972 hash of some human-readable text.
4973
4974 This is nowhere near a perfect hash function; it has suboptimal avalanche
4975 characteristics, and it not multicollision resistant. In particular,
4976 it fails many SMHasher tests, mostly for bias (collision tests are fine).
4977 However, it is of much better quality than the home-grown hash used
4978 for other collations (which fails _all_ SMHasher tests), while being
4979 much faster.
4980
4981 We ignore the n2 seed entirely, since we don't need it. The caller is
4982 responsible for doing hash folding at the end; we can't do that.
4983
4984 See http://isthe.com/chongo/tech/comp/fnv/#FNV-param for constants.
4985 */
4986
4987 188356976 uint64 h = *n1;
4988 188356976 h ^= 14695981039346656037ULL;
4989
4990
1/2
✓ Branch 0 taken 94178598 times.
✗ Branch 1 not taken.
188356976 scanner.for_each_weight(
4991 1664531737 [&](int s_res, bool) -> bool {
4992 1664531737 h ^= s_res;
4993 1664531737 h *= 1099511628211ULL;
4994 1664531737 return true;
4995 },
4996 384134149 [](int) { return true; });
4997
4998 188357946 *n1 = h;
4999 }
5000
5001 extern "C" {
5002
5003 94178102 static void my_hash_sort_uca_900(const CHARSET_INFO *cs, const uchar *s,
5004 size_t slen, uint64 *n1, uint64 *) {
5005
1/2
✓ Branch 0 taken 94178271 times.
✗ Branch 1 not taken.
94178102 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
5006
3/5
✓ Branch 0 taken 94178590 times.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 55 times.
✗ Branch 4 not taken.
94178271 switch (cs->levels_for_compare) {
5007 94178590 case 1:
5008
1/2
✓ Branch 0 taken 94179613 times.
✗ Branch 1 not taken.
94178590 return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 1>(cs, Mb_wc_utf8mb4(),
5009 94179613 s, slen, n1);
5010 12 case 2:
5011
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 2>(cs, Mb_wc_utf8mb4(),
5012 12 s, slen, n1);
5013 default:
5014 assert(false);
5015 55 case 3:
5016
1/2
✓ Branch 0 taken 55 times.
✗ Branch 1 not taken.
55 return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 3>(cs, Mb_wc_utf8mb4(),
5017 55 s, slen, n1);
5018 1 case 4:
5019
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
1 return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 4>(cs, Mb_wc_utf8mb4(),
5020 36 s, slen, n1);
5021 }
5022 }
5023
5024 Mb_wc_through_function_pointer mb_wc(cs);
5025 switch (cs->levels_for_compare) {
5026 case 1:
5027 return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 1>(cs, mb_wc, s, slen,
5028 n1);
5029 case 2:
5030 return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 2>(cs, mb_wc, s, slen,
5031 n1);
5032 default:
5033 assert(false);
5034 case 3:
5035 return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 3>(cs, mb_wc, s, slen,
5036 n1);
5037 case 4:
5038 return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 4>(cs, mb_wc, s, slen,
5039 n1);
5040 }
5041 }
5042
5043 } // extern "C"
5044
5045 /*
5046 Check if a constant can be propagated
5047
5048 Currently we don't check the constant itself, and decide not to propagate
5049 a constant just if the collation itself allows expansions or contractions.
5050 */
5051 195847 bool my_propagate_uca_900(const CHARSET_INFO *cs,
5052 const uchar *str [[maybe_unused]],
5053 size_t length [[maybe_unused]]) {
5054 195847 return !my_uca_have_contractions(cs->uca);
5055 }
5056
5057 template <class Mb_wc, int LEVELS_FOR_COMPARE>
5058 199974950 static size_t my_strnxfrm_uca_900_tmpl(const CHARSET_INFO *cs,
5059 const Mb_wc mb_wc, uchar *dst,
5060 size_t dstlen, const uchar *src,
5061 size_t srclen, uint flags) {
5062 199974950 uchar *d0 = dst;
5063 199974950 uchar *dst_end = dst + dstlen;
5064 199974950 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE> scanner(mb_wc, cs, src, srclen);
5065
5066
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 99987454 times.
199974950 assert((dstlen % 2) == 0);
5067
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 99987454 times.
199974908 if ((dstlen % 2) == 1) {
5068 // Emergency workaround for optimized mode.
5069 --dst_end;
5070 }
5071
5072
2/2
✓ Branch 0 taken 99987372 times.
✓ Branch 1 taken 82 times.
199974908 if (dst != dst_end) {
5073
2/2
✓ Branch 0 taken 34242307 times.
✓ Branch 1 taken 65745065 times.
399948876 scanner.for_each_weight(
5074 4333552181 [&dst, dst_end](int s_res,
5075 bool is_level_separator [[maybe_unused]]) -> bool {
5076
4/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 6762951 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 208009279 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 4473394 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1225271786 times.
1444517410 assert(is_level_separator == (s_res == 0));
5077
1/4
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1225271786 times.
1225271786 if (LEVELS_FOR_COMPARE == 1) assert(!is_level_separator);
5078
5079 1444517410 dst = store16be(dst, s_res);
5080 1444517361 return (dst < dst_end);
5081 },
5082 289299679 [&dst, dst_end](int num_weights) {
5083 289299679 return (dst < dst_end - num_weights * 2);
5084 });
5085 }
5086
5087
2/2
✓ Branch 0 taken 35189 times.
✓ Branch 1 taken 99951959 times.
199974296 if (flags & MY_STRXFRM_PAD_TO_MAXLEN) {
5088 70378 memset(dst, 0, dst_end - dst);
5089 70378 dst = dst_end;
5090 }
5091
5092 199974296 return dst - d0;
5093 }
5094
5095 extern "C" {
5096
5097 99987387 static size_t my_strnxfrm_uca_900(const CHARSET_INFO *cs, uchar *dst,
5098 size_t dstlen,
5099 uint num_codepoints [[maybe_unused]],
5100 const uchar *src, size_t srclen, uint flags) {
5101
1/2
✓ Branch 0 taken 99987438 times.
✗ Branch 1 not taken.
99987387 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
5102
4/5
✓ Branch 0 taken 63217051 times.
✓ Branch 1 taken 1116523 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 34539590 times.
✓ Branch 4 taken 1114274 times.
99987438 switch (cs->levels_for_compare) {
5103 63217051 case 1:
5104 63217051 return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 1>(
5105 63216760 cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags);
5106 1116523 case 2:
5107 1116523 return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 2>(
5108 1116523 cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags);
5109 default:
5110 assert(false);
5111 34539590 case 3:
5112 34539590 return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 3>(
5113 34539590 cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags);
5114 1114274 case 4:
5115 1114274 return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 4>(
5116 1114274 cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags);
5117 }
5118 } else {
5119 Mb_wc_through_function_pointer mb_wc(cs);
5120 switch (cs->levels_for_compare) {
5121 case 1:
5122 return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 1>(
5123 cs, mb_wc, dst, dstlen, src, srclen, flags);
5124 case 2:
5125 return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 2>(
5126 cs, mb_wc, dst, dstlen, src, srclen, flags);
5127 default:
5128 assert(false);
5129 case 3:
5130 return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 3>(
5131 cs, mb_wc, dst, dstlen, src, srclen, flags);
5132 case 4:
5133 return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 4>(
5134 cs, mb_wc, dst, dstlen, src, srclen, flags);
5135 }
5136 }
5137 }
5138
5139 526139 static size_t my_strnxfrmlen_uca_900(const CHARSET_INFO *cs, size_t len) {
5140 /*
5141 The character with the most weights is U+FDFA ARABIC LIGATURE SALLALLAHOU
5142 ALAYHE WASALLAM, which we truncate to eight weights. This is the most we
5143 can get in regular DUCET.
5144
5145 In addition, collations with reorderings can add an extra weight per weight,
5146 which currently only happens on the primary level. We simulate this by
5147 simply adding an extra level.
5148
5149 One could conceivably have tailorings yielding expansions having more than
5150 this, but we don't currently, and mostly, tailorings are about contractions
5151 and adding single weights anyway.
5152
5153 We also need to add room for one level separator between each level.
5154 */
5155 // We really ought to have len % 4 == 0, but not all calling code conforms.
5156 526139 const size_t num_codepoints = (len + 3) / 4;
5157 526139 const size_t max_num_weights_per_level = num_codepoints * 8;
5158 526139 size_t max_num_weights = max_num_weights_per_level * cs->levels_for_compare;
5159
4/4
✓ Branch 0 taken 146 times.
✓ Branch 1 taken 525993 times.
✓ Branch 2 taken 98 times.
✓ Branch 3 taken 48 times.
526139 if (cs->coll_param && cs->coll_param->reorder_param) {
5160 98 max_num_weights += max_num_weights_per_level;
5161 }
5162 526139 return (max_num_weights + (cs->levels_for_compare - 1)) * sizeof(uint16_t);
5163 }
5164
5165 } // extern "C"
5166
5167 /*
5168 UCS2 optimized CHARSET_INFO compatible wrappers.
5169 */
5170 extern "C" {
5171 12 static int my_strnncoll_ucs2_uca(const CHARSET_INFO *cs, const uchar *s,
5172 size_t slen, const uchar *t, size_t tlen,
5173 bool t_is_prefix) {
5174 12 Mb_wc_through_function_pointer mb_wc(cs);
5175
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 return my_strnncoll_uca<uca_scanner_any<decltype(mb_wc)>, 1>(
5176 24 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
5177 }
5178
5179 12139 static int my_strnncollsp_ucs2_uca(const CHARSET_INFO *cs, const uchar *s,
5180 size_t slen, const uchar *t, size_t tlen) {
5181 12139 Mb_wc_through_function_pointer mb_wc(cs);
5182
1/2
✓ Branch 0 taken 12139 times.
✗ Branch 1 not taken.
24278 return my_strnncollsp_uca(cs, mb_wc, s, slen, t, tlen);
5183 }
5184
5185 412 static void my_hash_sort_ucs2_uca(const CHARSET_INFO *cs, const uchar *s,
5186 size_t slen, uint64 *n1, uint64 *n2) {
5187 412 Mb_wc_through_function_pointer mb_wc(cs);
5188
1/2
✓ Branch 0 taken 412 times.
✗ Branch 1 not taken.
412 my_hash_sort_uca(cs, mb_wc, s, slen, n1, n2);
5189 412 }
5190
5191 1721990 static size_t my_strnxfrm_ucs2_uca(const CHARSET_INFO *cs, uchar *dst,
5192 size_t dstlen, uint num_codepoints,
5193 const uchar *src, size_t srclen,
5194 uint flags) {
5195 1721990 Mb_wc_through_function_pointer mb_wc(cs);
5196
1/2
✓ Branch 0 taken 1721990 times.
✗ Branch 1 not taken.
1721990 return my_strnxfrm_uca(cs, mb_wc, dst, dstlen, num_codepoints, src, srclen,
5197 3443980 flags);
5198 }
5199 } // extern "C"
5200
5201 MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = {
5202 my_coll_init_uca, /* init */
5203 my_coll_uninit_uca,
5204 my_strnncoll_ucs2_uca,
5205 my_strnncollsp_ucs2_uca,
5206 my_strnxfrm_ucs2_uca,
5207 my_strnxfrmlen_simple,
5208 my_like_range_generic,
5209 my_wildcmp_uca,
5210 nullptr,
5211 my_instr_mb,
5212 my_hash_sort_ucs2_uca,
5213 my_propagate_complex};
5214
5215 CHARSET_INFO my_charset_ucs2_unicode_ci = {
5216 128,
5217 0,
5218 0, /* number */
5219 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5220 "ucs2", /* cs name */
5221 "ucs2_unicode_ci", /* m_coll_name */
5222 "UCS-2 Unicode", /* comment */
5223 "", /* tailoring */
5224 nullptr, /* coll_param */
5225 nullptr, /* ctype */
5226 nullptr, /* to_lower */
5227 nullptr, /* to_upper */
5228 nullptr, /* sort_order */
5229 nullptr, /* uca */
5230 nullptr, /* tab_to_uni */
5231 nullptr, /* tab_from_uni */
5232 &my_unicase_default, /* caseinfo */
5233 nullptr, /* state_map */
5234 nullptr, /* ident_map */
5235 8, /* strxfrm_multiply */
5236 1, /* caseup_multiply */
5237 1, /* casedn_multiply */
5238 2, /* mbminlen */
5239 2, /* mbmaxlen */
5240 1, /* mbmaxlenlen */
5241 9, /* min_sort_char */
5242 0xFFFF, /* max_sort_char */
5243 ' ', /* pad char */
5244 false, /* escape_with_backslash_is_dangerous */
5245 1, /* levels_for_compare */
5246 &my_charset_ucs2_handler,
5247 &my_collation_ucs2_uca_handler,
5248 PAD_SPACE};
5249
5250 CHARSET_INFO my_charset_ucs2_icelandic_uca_ci = {
5251 129,
5252 0,
5253 0, /* number */
5254 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5255 "ucs2", /* cs name */
5256 "ucs2_icelandic_ci", /* m_coll_name */
5257 "UCS-2 Unicode", /* comment */
5258 icelandic, /* tailoring */
5259 nullptr, /* coll_param */
5260 nullptr, /* ctype */
5261 nullptr, /* to_lower */
5262 nullptr, /* to_upper */
5263 nullptr, /* sort_order */
5264 nullptr, /* uca */
5265 nullptr, /* tab_to_uni */
5266 nullptr, /* tab_from_uni */
5267 &my_unicase_default, /* caseinfo */
5268 nullptr, /* state_map */
5269 nullptr, /* ident_map */
5270 8, /* strxfrm_multiply */
5271 1, /* caseup_multiply */
5272 1, /* casedn_multiply */
5273 2, /* mbminlen */
5274 2, /* mbmaxlen */
5275 1, /* mbmaxlenlen */
5276 9, /* min_sort_char */
5277 0xFFFF, /* max_sort_char */
5278 ' ', /* pad char */
5279 false, /* escape_with_backslash_is_dangerous */
5280 1, /* levels_for_compare */
5281 &my_charset_ucs2_handler,
5282 &my_collation_ucs2_uca_handler,
5283 PAD_SPACE};
5284
5285 CHARSET_INFO my_charset_ucs2_latvian_uca_ci = {
5286 130,
5287 0,
5288 0, /* number */
5289 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5290 "ucs2", /* cs name */
5291 "ucs2_latvian_ci", /* m_coll_name */
5292 "UCS-2 Unicode", /* comment */
5293 latvian, /* tailoring */
5294 nullptr, /* coll_param */
5295 nullptr, /* ctype */
5296 nullptr, /* to_lower */
5297 nullptr, /* to_upper */
5298 nullptr, /* sort_order */
5299 nullptr, /* uca */
5300 nullptr, /* tab_to_uni */
5301 nullptr, /* tab_from_uni */
5302 &my_unicase_default, /* caseinfo */
5303 nullptr, /* state_map */
5304 nullptr, /* ident_map */
5305 8, /* strxfrm_multiply */
5306 1, /* caseup_multiply */
5307 1, /* casedn_multiply */
5308 2, /* mbminlen */
5309 2, /* mbmaxlen */
5310 1, /* mbmaxlenlen */
5311 9, /* min_sort_char */
5312 0xFFFF, /* max_sort_char */
5313 ' ', /* pad char */
5314 false, /* escape_with_backslash_is_dangerous */
5315 1, /* levels_for_compare */
5316 &my_charset_ucs2_handler,
5317 &my_collation_ucs2_uca_handler,
5318 PAD_SPACE};
5319
5320 CHARSET_INFO my_charset_ucs2_romanian_uca_ci = {
5321 131,
5322 0,
5323 0, /* number */
5324 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5325 "ucs2", /* cs name */
5326 "ucs2_romanian_ci", /* m_coll_name */
5327 "UCS-2 Unicode", /* comment */
5328 romanian, /* tailoring */
5329 nullptr, /* coll_param */
5330 nullptr, /* ctype */
5331 nullptr, /* to_lower */
5332 nullptr, /* to_upper */
5333 nullptr, /* sort_order */
5334 nullptr, /* uca */
5335 nullptr, /* tab_to_uni */
5336 nullptr, /* tab_from_uni */
5337 &my_unicase_default, /* caseinfo */
5338 nullptr, /* state_map */
5339 nullptr, /* ident_map */
5340 8, /* strxfrm_multiply */
5341 1, /* caseup_multiply */
5342 1, /* casedn_multiply */
5343 2, /* mbminlen */
5344 2, /* mbmaxlen */
5345 1, /* mbmaxlenlen */
5346 9, /* min_sort_char */
5347 0xFFFF, /* max_sort_char */
5348 ' ', /* pad char */
5349 false, /* escape_with_backslash_is_dangerous */
5350 1, /* levels_for_compare */
5351 &my_charset_ucs2_handler,
5352 &my_collation_ucs2_uca_handler,
5353 PAD_SPACE};
5354
5355 CHARSET_INFO my_charset_ucs2_slovenian_uca_ci = {
5356 132,
5357 0,
5358 0, /* number */
5359 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5360 "ucs2", /* cs name */
5361 "ucs2_slovenian_ci", /* m_coll_name */
5362 "UCS-2 Unicode", /* comment */
5363 slovenian, /* tailoring */
5364 nullptr, /* coll_param */
5365 nullptr, /* ctype */
5366 nullptr, /* to_lower */
5367 nullptr, /* to_upper */
5368 nullptr, /* sort_order */
5369 nullptr, /* uca */
5370 nullptr, /* tab_to_uni */
5371 nullptr, /* tab_from_uni */
5372 &my_unicase_default, /* caseinfo */
5373 nullptr, /* state_map */
5374 nullptr, /* ident_map */
5375 8, /* strxfrm_multiply */
5376 1, /* caseup_multiply */
5377 1, /* casedn_multiply */
5378 2, /* mbminlen */
5379 2, /* mbmaxlen */
5380 1, /* mbmaxlenlen */
5381 9, /* min_sort_char */
5382 0xFFFF, /* max_sort_char */
5383 ' ', /* pad char */
5384 false, /* escape_with_backslash_is_dangerous */
5385 1, /* levels_for_compare */
5386 &my_charset_ucs2_handler,
5387 &my_collation_ucs2_uca_handler,
5388 PAD_SPACE};
5389
5390 CHARSET_INFO my_charset_ucs2_polish_uca_ci = {
5391 133,
5392 0,
5393 0, /* number */
5394 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5395 "ucs2", /* cs name */
5396 "ucs2_polish_ci", /* m_coll_name */
5397 "UCS-2 Unicode", /* comment */
5398 polish, /* tailoring */
5399 nullptr, /* coll_param */
5400 nullptr, /* ctype */
5401 nullptr, /* to_lower */
5402 nullptr, /* to_upper */
5403 nullptr, /* sort_order */
5404 nullptr, /* uca */
5405 nullptr, /* tab_to_uni */
5406 nullptr, /* tab_from_uni */
5407 &my_unicase_default, /* caseinfo */
5408 nullptr, /* state_map */
5409 nullptr, /* ident_map */
5410 8, /* strxfrm_multiply */
5411 1, /* caseup_multiply */
5412 1, /* casedn_multiply */
5413 2, /* mbminlen */
5414 2, /* mbmaxlen */
5415 1, /* mbmaxlenlen */
5416 9, /* min_sort_char */
5417 0xFFFF, /* max_sort_char */
5418 ' ', /* pad char */
5419 false, /* escape_with_backslash_is_dangerous */
5420 1, /* levels_for_compare */
5421 &my_charset_ucs2_handler,
5422 &my_collation_ucs2_uca_handler,
5423 PAD_SPACE};
5424
5425 CHARSET_INFO my_charset_ucs2_estonian_uca_ci = {
5426 134,
5427 0,
5428 0, /* number */
5429 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5430 "ucs2", /* cs name */
5431 "ucs2_estonian_ci", /* m_coll_name */
5432 "UCS-2 Unicode", /* comment */
5433 estonian, /* tailoring */
5434 nullptr, /* coll_param */
5435 nullptr, /* ctype */
5436 nullptr, /* to_lower */
5437 nullptr, /* to_upper */
5438 nullptr, /* sort_order */
5439 nullptr, /* uca */
5440 nullptr, /* tab_to_uni */
5441 nullptr, /* tab_from_uni */
5442 &my_unicase_default, /* caseinfo */
5443 nullptr, /* state_map */
5444 nullptr, /* ident_map */
5445 8, /* strxfrm_multiply */
5446 1, /* caseup_multiply */
5447 1, /* casedn_multiply */
5448 2, /* mbminlen */
5449 2, /* mbmaxlen */
5450 1, /* mbmaxlenlen */
5451 9, /* min_sort_char */
5452 0xFFFF, /* max_sort_char */
5453 ' ', /* pad char */
5454 false, /* escape_with_backslash_is_dangerous */
5455 1, /* levels_for_compare */
5456 &my_charset_ucs2_handler,
5457 &my_collation_ucs2_uca_handler,
5458 PAD_SPACE};
5459
5460 CHARSET_INFO my_charset_ucs2_spanish_uca_ci = {
5461 135,
5462 0,
5463 0, /* number */
5464 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5465 "ucs2", /* cs name */
5466 "ucs2_spanish_ci", /* m_coll_name */
5467 "UCS-2 Unicode", /* comment */
5468 spanish, /* tailoring */
5469 nullptr, /* coll_param */
5470 nullptr, /* ctype */
5471 nullptr, /* to_lower */
5472 nullptr, /* to_upper */
5473 nullptr, /* sort_order */
5474 nullptr, /* uca */
5475 nullptr, /* tab_to_uni */
5476 nullptr, /* tab_from_uni */
5477 &my_unicase_default, /* caseinfo */
5478 nullptr, /* state_map */
5479 nullptr, /* ident_map */
5480 8, /* strxfrm_multiply */
5481 1, /* caseup_multiply */
5482 1, /* casedn_multiply */
5483 2, /* mbminlen */
5484 2, /* mbmaxlen */
5485 1, /* mbmaxlenlen */
5486 9, /* min_sort_char */
5487 0xFFFF, /* max_sort_char */
5488 ' ', /* pad char */
5489 false, /* escape_with_backslash_is_dangerous */
5490 1, /* levels_for_compare */
5491 &my_charset_ucs2_handler,
5492 &my_collation_ucs2_uca_handler,
5493 PAD_SPACE};
5494
5495 CHARSET_INFO my_charset_ucs2_swedish_uca_ci = {
5496 136,
5497 0,
5498 0, /* number */
5499 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5500 "ucs2", /* cs name */
5501 "ucs2_swedish_ci", /* m_coll_name */
5502 "UCS-2 Unicode", /* comment */
5503 swedish, /* tailoring */
5504 nullptr, /* coll_param */
5505 nullptr, /* ctype */
5506 nullptr, /* to_lower */
5507 nullptr, /* to_upper */
5508 nullptr, /* sort_order */
5509 nullptr, /* uca */
5510 nullptr, /* tab_to_uni */
5511 nullptr, /* tab_from_uni */
5512 &my_unicase_default, /* caseinfo */
5513 nullptr, /* state_map */
5514 nullptr, /* ident_map */
5515 8, /* strxfrm_multiply */
5516 1, /* caseup_multiply */
5517 1, /* casedn_multiply */
5518 2, /* mbminlen */
5519 2, /* mbmaxlen */
5520 1, /* mbmaxlenlen */
5521 9, /* min_sort_char */
5522 0xFFFF, /* max_sort_char */
5523 ' ', /* pad char */
5524 false, /* escape_with_backslash_is_dangerous */
5525 1, /* levels_for_compare */
5526 &my_charset_ucs2_handler,
5527 &my_collation_ucs2_uca_handler,
5528 PAD_SPACE};
5529
5530 CHARSET_INFO my_charset_ucs2_turkish_uca_ci = {
5531 137,
5532 0,
5533 0, /* number */
5534 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5535 "ucs2", /* cs name */
5536 "ucs2_turkish_ci", /* m_coll_name */
5537 "UCS-2 Unicode", /* comment */
5538 turkish, /* tailoring */
5539 nullptr, /* coll_param */
5540 nullptr, /* ctype */
5541 nullptr, /* to_lower */
5542 nullptr, /* to_upper */
5543 nullptr, /* sort_order */
5544 nullptr, /* uca */
5545 nullptr, /* tab_to_uni */
5546 nullptr, /* tab_from_uni */
5547 &my_unicase_turkish, /* caseinfo */
5548 nullptr, /* state_map */
5549 nullptr, /* ident_map */
5550 8, /* strxfrm_multiply */
5551 1, /* caseup_multiply */
5552 1, /* casedn_multiply */
5553 2, /* mbminlen */
5554 2, /* mbmaxlen */
5555 1, /* mbmaxlenlen */
5556 9, /* min_sort_char */
5557 0xFFFF, /* max_sort_char */
5558 ' ', /* pad char */
5559 false, /* escape_with_backslash_is_dangerous */
5560 1, /* levels_for_compare */
5561 &my_charset_ucs2_handler,
5562 &my_collation_ucs2_uca_handler,
5563 PAD_SPACE};
5564
5565 CHARSET_INFO my_charset_ucs2_czech_uca_ci = {
5566 138,
5567 0,
5568 0, /* number */
5569 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5570 "ucs2", /* cs name */
5571 "ucs2_czech_ci", /* m_coll_name */
5572 "UCS-2 Unicode", /* comment */
5573 czech, /* tailoring */
5574 nullptr, /* coll_param */
5575 nullptr, /* ctype */
5576 nullptr, /* to_lower */
5577 nullptr, /* to_upper */
5578 nullptr, /* sort_order */
5579 nullptr, /* uca */
5580 nullptr, /* tab_to_uni */
5581 nullptr, /* tab_from_uni */
5582 &my_unicase_default, /* caseinfo */
5583 nullptr, /* state_map */
5584 nullptr, /* ident_map */
5585 8, /* strxfrm_multiply */
5586 1, /* caseup_multiply */
5587 1, /* casedn_multiply */
5588 2, /* mbminlen */
5589 2, /* mbmaxlen */
5590 1, /* mbmaxlenlen */
5591 9, /* min_sort_char */
5592 0xFFFF, /* max_sort_char */
5593 ' ', /* pad char */
5594 false, /* escape_with_backslash_is_dangerous */
5595 1, /* levels_for_compare */
5596 &my_charset_ucs2_handler,
5597 &my_collation_ucs2_uca_handler,
5598 PAD_SPACE};
5599
5600 CHARSET_INFO my_charset_ucs2_danish_uca_ci = {
5601 139,
5602 0,
5603 0, /* number */
5604 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5605 "ucs2", /* cs name */
5606 "ucs2_danish_ci", /* m_coll_name */
5607 "UCS-2 Unicode", /* comment */
5608 danish, /* tailoring */
5609 nullptr, /* coll_param */
5610 nullptr, /* ctype */
5611 nullptr, /* to_lower */
5612 nullptr, /* to_upper */
5613 nullptr, /* sort_order */
5614 nullptr, /* uca */
5615 nullptr, /* tab_to_uni */
5616 nullptr, /* tab_from_uni */
5617 &my_unicase_default, /* caseinfo */
5618 nullptr, /* state_map */
5619 nullptr, /* ident_map */
5620 8, /* strxfrm_multiply */
5621 1, /* caseup_multiply */
5622 1, /* casedn_multiply */
5623 2, /* mbminlen */
5624 2, /* mbmaxlen */
5625 1, /* mbmaxlenlen */
5626 9, /* min_sort_char */
5627 0xFFFF, /* max_sort_char */
5628 ' ', /* pad char */
5629 false, /* escape_with_backslash_is_dangerous */
5630 1, /* levels_for_compare */
5631 &my_charset_ucs2_handler,
5632 &my_collation_ucs2_uca_handler,
5633 PAD_SPACE};
5634
5635 CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci = {
5636 140,
5637 0,
5638 0, /* number */
5639 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5640 "ucs2", /* cs name */
5641 "ucs2_lithuanian_ci", /* m_coll_name */
5642 "UCS-2 Unicode", /* comment */
5643 lithuanian, /* tailoring */
5644 nullptr, /* coll_param */
5645 nullptr, /* ctype */
5646 nullptr, /* to_lower */
5647 nullptr, /* to_upper */
5648 nullptr, /* sort_order */
5649 nullptr, /* uca */
5650 nullptr, /* tab_to_uni */
5651 nullptr, /* tab_from_uni */
5652 &my_unicase_default, /* caseinfo */
5653 nullptr, /* state_map */
5654 nullptr, /* ident_map */
5655 8, /* strxfrm_multiply */
5656 1, /* caseup_multiply */
5657 1, /* casedn_multiply */
5658 2, /* mbminlen */
5659 2, /* mbmaxlen */
5660 1, /* mbmaxlenlen */
5661 9, /* min_sort_char */
5662 0xFFFF, /* max_sort_char */
5663 ' ', /* pad char */
5664 false, /* escape_with_backslash_is_dangerous */
5665 1, /* levels_for_compare */
5666 &my_charset_ucs2_handler,
5667 &my_collation_ucs2_uca_handler,
5668 PAD_SPACE};
5669
5670 CHARSET_INFO my_charset_ucs2_slovak_uca_ci = {
5671 141,
5672 0,
5673 0, /* number */
5674 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5675 "ucs2", /* cs name */
5676 "ucs2_slovak_ci", /* m_coll_name */
5677 "UCS-2 Unicode", /* comment */
5678 slovak, /* tailoring */
5679 nullptr, /* coll_param */
5680 nullptr, /* ctype */
5681 nullptr, /* to_lower */
5682 nullptr, /* to_upper */
5683 nullptr, /* sort_order */
5684 nullptr, /* uca */
5685 nullptr, /* tab_to_uni */
5686 nullptr, /* tab_from_uni */
5687 &my_unicase_default, /* caseinfo */
5688 nullptr, /* state_map */
5689 nullptr, /* ident_map */
5690 8, /* strxfrm_multiply */
5691 1, /* caseup_multiply */
5692 1, /* casedn_multiply */
5693 2, /* mbminlen */
5694 2, /* mbmaxlen */
5695 1, /* mbmaxlenlen */
5696 9, /* min_sort_char */
5697 0xFFFF, /* max_sort_char */
5698 ' ', /* pad char */
5699 false, /* escape_with_backslash_is_dangerous */
5700 1, /* levels_for_compare */
5701 &my_charset_ucs2_handler,
5702 &my_collation_ucs2_uca_handler,
5703 PAD_SPACE};
5704
5705 CHARSET_INFO my_charset_ucs2_spanish2_uca_ci = {
5706 142,
5707 0,
5708 0, /* number */
5709 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5710 "ucs2", /* cs name */
5711 "ucs2_spanish2_ci", /* m_coll_name */
5712 "UCS-2 Unicode", /* comment */
5713 spanish2, /* tailoring */
5714 nullptr, /* coll_param */
5715 nullptr, /* ctype */
5716 nullptr, /* to_lower */
5717 nullptr, /* to_upper */
5718 nullptr, /* sort_order */
5719 nullptr, /* uca */
5720 nullptr, /* tab_to_uni */
5721 nullptr, /* tab_from_uni */
5722 &my_unicase_default, /* caseinfo */
5723 nullptr, /* state_map */
5724 nullptr, /* ident_map */
5725 8, /* strxfrm_multiply */
5726 1, /* caseup_multiply */
5727 1, /* casedn_multiply */
5728 2, /* mbminlen */
5729 2, /* mbmaxlen */
5730 1, /* mbmaxlenlen */
5731 9, /* min_sort_char */
5732 0xFFFF, /* max_sort_char */
5733 ' ', /* pad char */
5734 false, /* escape_with_backslash_is_dangerous */
5735 1, /* levels_for_compare */
5736 &my_charset_ucs2_handler,
5737 &my_collation_ucs2_uca_handler,
5738 PAD_SPACE};
5739
5740 CHARSET_INFO my_charset_ucs2_roman_uca_ci = {
5741 143,
5742 0,
5743 0, /* number */
5744 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5745 "ucs2", /* cs name */
5746 "ucs2_roman_ci", /* m_coll_name */
5747 "UCS-2 Unicode", /* comment */
5748 roman, /* tailoring */
5749 nullptr, /* coll_param */
5750 nullptr, /* ctype */
5751 nullptr, /* to_lower */
5752 nullptr, /* to_upper */
5753 nullptr, /* sort_order */
5754 nullptr, /* uca */
5755 nullptr, /* tab_to_uni */
5756 nullptr, /* tab_from_uni */
5757 &my_unicase_default, /* caseinfo */
5758 nullptr, /* state_map */
5759 nullptr, /* ident_map */
5760 8, /* strxfrm_multiply */
5761 1, /* caseup_multiply */
5762 1, /* casedn_multiply */
5763 2, /* mbminlen */
5764 2, /* mbmaxlen */
5765 1, /* mbmaxlenlen */
5766 9, /* min_sort_char */
5767 0xFFFF, /* max_sort_char */
5768 ' ', /* pad char */
5769 false, /* escape_with_backslash_is_dangerous */
5770 1, /* levels_for_compare */
5771 &my_charset_ucs2_handler,
5772 &my_collation_ucs2_uca_handler,
5773 PAD_SPACE};
5774
5775 CHARSET_INFO my_charset_ucs2_persian_uca_ci = {
5776 144,
5777 0,
5778 0, /* number */
5779 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5780 "ucs2", /* cs name */
5781 "ucs2_persian_ci", /* m_coll_name */
5782 "UCS-2 Unicode", /* comment */
5783 persian, /* tailoring */
5784 nullptr, /* coll_param */
5785 nullptr, /* ctype */
5786 nullptr, /* to_lower */
5787 nullptr, /* to_upper */
5788 nullptr, /* sort_order */
5789 nullptr, /* uca */
5790 nullptr, /* tab_to_uni */
5791 nullptr, /* tab_from_uni */
5792 &my_unicase_default, /* caseinfo */
5793 nullptr, /* state_map */
5794 nullptr, /* ident_map */
5795 8, /* strxfrm_multiply */
5796 1, /* caseup_multiply */
5797 1, /* casedn_multiply */
5798 2, /* mbminlen */
5799 2, /* mbmaxlen */
5800 1, /* mbmaxlenlen */
5801 9, /* min_sort_char */
5802 0xFFFF, /* max_sort_char */
5803 ' ', /* pad char */
5804 false, /* escape_with_backslash_is_dangerous */
5805 1, /* levels_for_compare */
5806 &my_charset_ucs2_handler,
5807 &my_collation_ucs2_uca_handler,
5808 PAD_SPACE};
5809
5810 CHARSET_INFO my_charset_ucs2_esperanto_uca_ci = {
5811 145,
5812 0,
5813 0, /* number */
5814 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5815 "ucs2", /* cs name */
5816 "ucs2_esperanto_ci", /* m_coll_name */
5817 "UCS-2 Unicode", /* comment */
5818 esperanto, /* tailoring */
5819 nullptr, /* coll_param */
5820 nullptr, /* ctype */
5821 nullptr, /* to_lower */
5822 nullptr, /* to_upper */
5823 nullptr, /* sort_order */
5824 nullptr, /* uca */
5825 nullptr, /* tab_to_uni */
5826 nullptr, /* tab_from_uni */
5827 &my_unicase_default, /* caseinfo */
5828 nullptr, /* state_map */
5829 nullptr, /* ident_map */
5830 8, /* strxfrm_multiply */
5831 1, /* caseup_multiply */
5832 1, /* casedn_multiply */
5833 2, /* mbminlen */
5834 2, /* mbmaxlen */
5835 1, /* mbmaxlenlen */
5836 9, /* min_sort_char */
5837 0xFFFF, /* max_sort_char */
5838 ' ', /* pad char */
5839 false, /* escape_with_backslash_is_dangerous */
5840 1, /* levels_for_compare */
5841 &my_charset_ucs2_handler,
5842 &my_collation_ucs2_uca_handler,
5843 PAD_SPACE};
5844
5845 CHARSET_INFO my_charset_ucs2_hungarian_uca_ci = {
5846 146,
5847 0,
5848 0, /* number */
5849 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5850 "ucs2", /* cs name */
5851 "ucs2_hungarian_ci", /* m_coll_name */
5852 "UCS-2 Unicode", /* comment */
5853 hungarian, /* tailoring */
5854 nullptr, /* coll_param */
5855 nullptr, /* ctype */
5856 nullptr, /* to_lower */
5857 nullptr, /* to_upper */
5858 nullptr, /* sort_order */
5859 nullptr, /* uca */
5860 nullptr, /* tab_to_uni */
5861 nullptr, /* tab_from_uni */
5862 &my_unicase_default, /* caseinfo */
5863 nullptr, /* state_map */
5864 nullptr, /* ident_map */
5865 8, /* strxfrm_multiply */
5866 1, /* caseup_multiply */
5867 1, /* casedn_multiply */
5868 2, /* mbminlen */
5869 2, /* mbmaxlen */
5870 1, /* mbmaxlenlen */
5871 9, /* min_sort_char */
5872 0xFFFF, /* max_sort_char */
5873 ' ', /* pad char */
5874 false, /* escape_with_backslash_is_dangerous */
5875 1, /* levels_for_compare */
5876 &my_charset_ucs2_handler,
5877 &my_collation_ucs2_uca_handler,
5878 PAD_SPACE};
5879
5880 CHARSET_INFO my_charset_ucs2_sinhala_uca_ci = {
5881 147,
5882 0,
5883 0, /* number */
5884 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5885 "ucs2", /* csname */
5886 "ucs2_sinhala_ci", /* m_coll_name */
5887 "UCS-2 Unicode", /* comment */
5888 sinhala, /* tailoring */
5889 nullptr, /* coll_param */
5890 nullptr, /* ctype */
5891 nullptr, /* to_lower */
5892 nullptr, /* to_upper */
5893 nullptr, /* sort_order */
5894 nullptr, /* uca */
5895 nullptr, /* tab_to_uni */
5896 nullptr, /* tab_from_uni */
5897 &my_unicase_default, /* caseinfo */
5898 nullptr, /* state_map */
5899 nullptr, /* ident_map */
5900 8, /* strxfrm_multiply */
5901 1, /* caseup_multiply */
5902 1, /* casedn_multiply */
5903 2, /* mbminlen */
5904 2, /* mbmaxlen */
5905 1, /* mbmaxlenlen */
5906 9, /* min_sort_char */
5907 0xFFFF, /* max_sort_char */
5908 ' ', /* pad char */
5909 false, /* escape_with_backslash_is_dangerous */
5910 1, /* levels_for_compare */
5911 &my_charset_ucs2_handler,
5912 &my_collation_ucs2_uca_handler,
5913 PAD_SPACE};
5914
5915 CHARSET_INFO my_charset_ucs2_german2_uca_ci = {
5916 148,
5917 0,
5918 0, /* number */
5919 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5920 "ucs2", /* csname */
5921 "ucs2_german2_ci", /* m_coll_name */
5922 "UCS-2 Unicode", /* comment */
5923 german2, /* tailoring */
5924 nullptr, /* coll_param */
5925 nullptr, /* ctype */
5926 nullptr, /* to_lower */
5927 nullptr, /* to_upper */
5928 nullptr, /* sort_order */
5929 nullptr, /* uca */
5930 nullptr, /* tab_to_uni */
5931 nullptr, /* tab_from_uni */
5932 &my_unicase_default, /* caseinfo */
5933 nullptr, /* state_map */
5934 nullptr, /* ident_map */
5935 8, /* strxfrm_multiply */
5936 1, /* caseup_multiply */
5937 1, /* casedn_multiply */
5938 2, /* mbminlen */
5939 2, /* mbmaxlen */
5940 1, /* mbmaxlenlen */
5941 9, /* min_sort_char */
5942 0xFFFF, /* max_sort_char */
5943 ' ', /* pad char */
5944 false, /* escape_with_backslash_is_dangerous */
5945 1, /* levels_for_compare */
5946 &my_charset_ucs2_handler,
5947 &my_collation_ucs2_uca_handler,
5948 PAD_SPACE};
5949
5950 CHARSET_INFO my_charset_ucs2_croatian_uca_ci = {
5951 149,
5952 0,
5953 0, /* number */
5954 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5955 "ucs2", /* csname */
5956 "ucs2_croatian_ci", /* m_coll_name */
5957 "UCS-2 Unicode", /* comment */
5958 croatian, /* tailoring */
5959 nullptr, /* coll_param */
5960 nullptr, /* ctype */
5961 nullptr, /* to_lower */
5962 nullptr, /* to_upper */
5963 nullptr, /* sort_order */
5964 nullptr, /* uca */
5965 nullptr, /* tab_to_uni */
5966 nullptr, /* tab_from_uni */
5967 &my_unicase_default, /* caseinfo */
5968 nullptr, /* state_map */
5969 nullptr, /* ident_map */
5970 8, /* strxfrm_multiply */
5971 1, /* caseup_multiply */
5972 1, /* casedn_multiply */
5973 2, /* mbminlen */
5974 2, /* mbmaxlen */
5975 1, /* mbmaxlenlen */
5976 9, /* min_sort_char */
5977 0xFFFF, /* max_sort_char */
5978 ' ', /* pad char */
5979 false, /* escape_with_backslash_is_dangerous */
5980 1, /* levels_for_compare */
5981 &my_charset_ucs2_handler,
5982 &my_collation_ucs2_uca_handler,
5983 PAD_SPACE};
5984
5985 CHARSET_INFO my_charset_ucs2_unicode_520_ci = {
5986 150,
5987 0,
5988 0, /* number */
5989 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5990 "ucs2", /* cs name */
5991 "ucs2_unicode_520_ci", /* m_coll_name */
5992 "UCS-2 Unicode", /* comment */
5993 "", /* tailoring */
5994 nullptr, /* coll_param */
5995 nullptr, /* ctype */
5996 nullptr, /* to_lower */
5997 nullptr, /* to_upper */
5998 nullptr, /* sort_order */
5999 &my_uca_v520, /* uca */
6000 nullptr, /* tab_to_uni */
6001 nullptr, /* tab_from_uni */
6002 &my_unicase_unicode520, /* caseinfo */
6003 nullptr, /* state_map */
6004 nullptr, /* ident_map */
6005 8, /* strxfrm_multiply */
6006 1, /* caseup_multiply */
6007 1, /* casedn_multiply */
6008 2, /* mbminlen */
6009 2, /* mbmaxlen */
6010 1, /* mbmaxlenlen */
6011 9, /* min_sort_char */
6012 0xFFFF, /* max_sort_char */
6013 ' ', /* pad char */
6014 false, /* escape_with_backslash_is_dangerous */
6015 1, /* levels_for_compare */
6016 &my_charset_ucs2_handler,
6017 &my_collation_ucs2_uca_handler,
6018 PAD_SPACE};
6019
6020 CHARSET_INFO my_charset_ucs2_vietnamese_ci = {
6021 151,
6022 0,
6023 0, /* number */
6024 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
6025 "ucs2", /* csname */
6026 "ucs2_vietnamese_ci", /* m_coll_name */
6027 "UCS-2 Unicode", /* comment */
6028 vietnamese, /* tailoring */
6029 nullptr, /* coll_param */
6030 nullptr, /* ctype */
6031 nullptr, /* to_lower */
6032 nullptr, /* to_upper */
6033 nullptr, /* sort_order */
6034 nullptr, /* uca */
6035 nullptr, /* tab_to_uni */
6036 nullptr, /* tab_from_uni */
6037 &my_unicase_default, /* caseinfo */
6038 nullptr, /* state_map */
6039 nullptr, /* ident_map */
6040 8, /* strxfrm_multiply */
6041 1, /* caseup_multiply */
6042 1, /* casedn_multiply */
6043 2, /* mbminlen */
6044 2, /* mbmaxlen */
6045 1, /* mbmaxlenlen */
6046 9, /* min_sort_char */
6047 0xFFFF, /* max_sort_char */
6048 ' ', /* pad char */
6049 false, /* escape_with_backslash_is_dangerous */
6050 1, /* levels_for_compare */
6051 &my_charset_ucs2_handler,
6052 &my_collation_ucs2_uca_handler,
6053 PAD_SPACE};
6054
6055 MY_COLLATION_HANDLER my_collation_any_uca_handler = {
6056 my_coll_init_uca, /* init */
6057 my_coll_uninit_uca, my_strnncoll_any_uca, my_strnncollsp_any_uca,
6058 my_strnxfrm_any_uca, my_strnxfrmlen_simple, my_like_range_mb,
6059 my_wildcmp_uca, my_strcasecmp_uca, my_instr_mb,
6060 my_hash_sort_any_uca, my_propagate_complex};
6061
6062 MY_COLLATION_HANDLER my_collation_uca_900_handler = {
6063 my_coll_init_uca, /* init */
6064 my_coll_uninit_uca, my_strnncoll_uca_900, my_strnncollsp_uca_900,
6065 my_strnxfrm_uca_900, my_strnxfrmlen_uca_900, my_like_range_mb,
6066 my_wildcmp_uca, my_strcasecmp_uca, my_instr_mb,
6067 my_hash_sort_uca_900, my_propagate_uca_900};
6068
6069 /*
6070 We consider bytes with code more than 127 as a letter.
6071 This guarantees that word boundaries work fine with regular
6072 expressions. Note, there is no need to mark byte 255 as a
6073 letter, it is illegal byte in UTF8.
6074 */
6075 static const uchar ctype_utf8[] = {
6076 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32,
6077 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
6078 32, 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
6079 16, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16,
6080 16, 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1,
6081 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16,
6082 16, 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2,
6083 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16,
6084 32, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6085 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6086 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6087 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6088 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6089 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6090 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6091 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6092 0};
6093
6094 extern MY_CHARSET_HANDLER my_charset_utf8_handler;
6095
6096 #define MY_CS_UTF8MB3_UCA_FLAGS \
6097 (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE)
6098
6099 CHARSET_INFO my_charset_utf8_unicode_ci = {
6100 192,
6101 0,
6102 0, /* number */
6103 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6104 "utf8mb3", /* cs name */
6105 "utf8mb3_unicode_ci", /* m_coll_name */
6106 "UCS-2 Unicode", /* comment */
6107 "", /* tailoring */
6108 nullptr, /* coll_param */
6109 ctype_utf8, /* ctype */
6110 nullptr, /* to_lower */
6111 nullptr, /* to_upper */
6112 nullptr, /* sort_order */
6113 nullptr, /* uca */
6114 nullptr, /* tab_to_uni */
6115 nullptr, /* tab_from_uni */
6116 &my_unicase_default, /* caseinfo */
6117 nullptr, /* state_map */
6118 nullptr, /* ident_map */
6119 8, /* strxfrm_multiply */
6120 1, /* caseup_multiply */
6121 1, /* casedn_multiply */
6122 1, /* mbminlen */
6123 3, /* mbmaxlen */
6124 1, /* mbmaxlenlen */
6125 9, /* min_sort_char */
6126 0xFFFF, /* max_sort_char */
6127 ' ', /* pad char */
6128 false, /* escape_with_backslash_is_dangerous */
6129 1, /* levels_for_compare */
6130 &my_charset_utf8_handler,
6131 &my_collation_any_uca_handler,
6132 PAD_SPACE};
6133
6134 CHARSET_INFO my_charset_utf8_icelandic_uca_ci = {
6135 193,
6136 0,
6137 0, /* number */
6138 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6139 "utf8mb3", /* cs name */
6140 "utf8mb3_icelandic_ci", /* m_coll_name */
6141 "UTF-8 Unicode", /* comment */
6142 icelandic, /* tailoring */
6143 nullptr, /* coll_param */
6144 ctype_utf8, /* ctype */
6145 nullptr, /* to_lower */
6146 nullptr, /* to_upper */
6147 nullptr, /* sort_order */
6148 nullptr, /* uca */
6149 nullptr, /* tab_to_uni */
6150 nullptr, /* tab_from_uni */
6151 &my_unicase_default, /* caseinfo */
6152 nullptr, /* state_map */
6153 nullptr, /* ident_map */
6154 8, /* strxfrm_multiply */
6155 1, /* caseup_multiply */
6156 1, /* casedn_multiply */
6157 1, /* mbminlen */
6158 3, /* mbmaxlen */
6159 1, /* mbmaxlenlen */
6160 9, /* min_sort_char */
6161 0xFFFF, /* max_sort_char */
6162 ' ', /* pad char */
6163 false, /* escape_with_backslash_is_dangerous */
6164 1, /* levels_for_compare */
6165 &my_charset_utf8_handler,
6166 &my_collation_any_uca_handler,
6167 PAD_SPACE};
6168
6169 CHARSET_INFO my_charset_utf8_latvian_uca_ci = {
6170 194,
6171 0,
6172 0, /* number */
6173 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6174 "utf8mb3", /* cs name */
6175 "utf8mb3_latvian_ci", /* m_coll_name */
6176 "UTF-8 Unicode", /* comment */
6177 latvian, /* tailoring */
6178 nullptr, /* coll_param */
6179 ctype_utf8, /* ctype */
6180 nullptr, /* to_lower */
6181 nullptr, /* to_upper */
6182 nullptr, /* sort_order */
6183 nullptr, /* uca */
6184 nullptr, /* tab_to_uni */
6185 nullptr, /* tab_from_uni */
6186 &my_unicase_default, /* caseinfo */
6187 nullptr, /* state_map */
6188 nullptr, /* ident_map */
6189 8, /* strxfrm_multiply */
6190 1, /* caseup_multiply */
6191 1, /* casedn_multiply */
6192 1, /* mbminlen */
6193 3, /* mbmaxlen */
6194 1, /* mbmaxlenlen */
6195 9, /* min_sort_char */
6196 0xFFFF, /* max_sort_char */
6197 ' ', /* pad char */
6198 false, /* escape_with_backslash_is_dangerous */
6199 1, /* levels_for_compare */
6200 &my_charset_utf8_handler,
6201 &my_collation_any_uca_handler,
6202 PAD_SPACE};
6203
6204 CHARSET_INFO my_charset_utf8_romanian_uca_ci = {
6205 195,
6206 0,
6207 0, /* number */
6208 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6209 "utf8mb3", /* cs name */
6210 "utf8mb3_romanian_ci", /* m_coll_name */
6211 "UTF-8 Unicode", /* comment */
6212 romanian, /* tailoring */
6213 nullptr, /* coll_param */
6214 ctype_utf8, /* ctype */
6215 nullptr, /* to_lower */
6216 nullptr, /* to_upper */
6217 nullptr, /* sort_order */
6218 nullptr, /* uca */
6219 nullptr, /* tab_to_uni */
6220 nullptr, /* tab_from_uni */
6221 &my_unicase_default, /* caseinfo */
6222 nullptr, /* state_map */
6223 nullptr, /* ident_map */
6224 8, /* strxfrm_multiply */
6225 1, /* caseup_multiply */
6226 1, /* casedn_multiply */
6227 1, /* mbminlen */
6228 3, /* mbmaxlen */
6229 1, /* mbmaxlenlen */
6230 9, /* min_sort_char */
6231 0xFFFF, /* max_sort_char */
6232 ' ', /* pad char */
6233 false, /* escape_with_backslash_is_dangerous */
6234 1, /* levels_for_compare */
6235 &my_charset_utf8_handler,
6236 &my_collation_any_uca_handler,
6237 PAD_SPACE};
6238
6239 CHARSET_INFO my_charset_utf8_slovenian_uca_ci = {
6240 196,
6241 0,
6242 0, /* number */
6243 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6244 "utf8mb3", /* cs name */
6245 "utf8mb3_slovenian_ci", /* m_coll_name */
6246 "UTF-8 Unicode", /* comment */
6247 slovenian, /* tailoring */
6248 nullptr, /* coll_param */
6249 ctype_utf8, /* ctype */
6250 nullptr, /* to_lower */
6251 nullptr, /* to_upper */
6252 nullptr, /* sort_order */
6253 nullptr, /* uca */
6254 nullptr, /* tab_to_uni */
6255 nullptr, /* tab_from_uni */
6256 &my_unicase_default, /* caseinfo */
6257 nullptr, /* state_map */
6258 nullptr, /* ident_map */
6259 8, /* strxfrm_multiply */
6260 1, /* caseup_multiply */
6261 1, /* casedn_multiply */
6262 1, /* mbminlen */
6263 3, /* mbmaxlen */
6264 1, /* mbmaxlenlen */
6265 9, /* min_sort_char */
6266 0xFFFF, /* max_sort_char */
6267 ' ', /* pad char */
6268 false, /* escape_with_backslash_is_dangerous */
6269 1, /* levels_for_compare */
6270 &my_charset_utf8_handler,
6271 &my_collation_any_uca_handler,
6272 PAD_SPACE};
6273
6274 CHARSET_INFO my_charset_utf8_polish_uca_ci = {
6275 197,
6276 0,
6277 0, /* number */
6278 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6279 "utf8mb3", /* cs name */
6280 "utf8mb3_polish_ci", /* m_coll_name */
6281 "UTF-8 Unicode", /* comment */
6282 polish, /* tailoring */
6283 nullptr, /* coll_param */
6284 ctype_utf8, /* ctype */
6285 nullptr, /* to_lower */
6286 nullptr, /* to_upper */
6287 nullptr, /* sort_order */
6288 nullptr, /* uca */
6289 nullptr, /* tab_to_uni */
6290 nullptr, /* tab_from_uni */
6291 &my_unicase_default, /* caseinfo */
6292 nullptr, /* state_map */
6293 nullptr, /* ident_map */
6294 8, /* strxfrm_multiply */
6295 1, /* caseup_multiply */
6296 1, /* casedn_multiply */
6297 1, /* mbminlen */
6298 3, /* mbmaxlen */
6299 1, /* mbmaxlenlen */
6300 9, /* min_sort_char */
6301 0xFFFF, /* max_sort_char */
6302 ' ', /* pad char */
6303 false, /* escape_with_backslash_is_dangerous */
6304 1, /* levels_for_compare */
6305 &my_charset_utf8_handler,
6306 &my_collation_any_uca_handler,
6307 PAD_SPACE};
6308
6309 CHARSET_INFO my_charset_utf8_estonian_uca_ci = {
6310 198,
6311 0,
6312 0, /* number */
6313 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6314 "utf8mb3", /* cs name */
6315 "utf8mb3_estonian_ci", /* m_coll_name */
6316 "UTF-8 Unicode", /* comment */
6317 estonian, /* tailoring */
6318 nullptr, /* coll_param */
6319 ctype_utf8, /* ctype */
6320 nullptr, /* to_lower */
6321 nullptr, /* to_upper */
6322 nullptr, /* sort_order */
6323 nullptr, /* uca */
6324 nullptr, /* tab_to_uni */
6325 nullptr, /* tab_from_uni */
6326 &my_unicase_default, /* caseinfo */
6327 nullptr, /* state_map */
6328 nullptr, /* ident_map */
6329 8, /* strxfrm_multiply */
6330 1, /* caseup_multiply */
6331 1, /* casedn_multiply */
6332 1, /* mbminlen */
6333 3, /* mbmaxlen */
6334 1, /* mbmaxlenlen */
6335 9, /* min_sort_char */
6336 0xFFFF, /* max_sort_char */
6337 ' ', /* pad char */
6338 false, /* escape_with_backslash_is_dangerous */
6339 1, /* levels_for_compare */
6340 &my_charset_utf8_handler,
6341 &my_collation_any_uca_handler,
6342 PAD_SPACE};
6343
6344 CHARSET_INFO my_charset_utf8_spanish_uca_ci = {
6345 199,
6346 0,
6347 0, /* number */
6348 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6349 "utf8mb3", /* cs name */
6350 "utf8mb3_spanish_ci", /* m_coll_name */
6351 "UTF-8 Unicode", /* comment */
6352 spanish, /* tailoring */
6353 nullptr, /* coll_param */
6354 ctype_utf8, /* ctype */
6355 nullptr, /* to_lower */
6356 nullptr, /* to_upper */
6357 nullptr, /* sort_order */
6358 nullptr, /* uca */
6359 nullptr, /* tab_to_uni */
6360 nullptr, /* tab_from_uni */
6361 &my_unicase_default, /* caseinfo */
6362 nullptr, /* state_map */
6363 nullptr, /* ident_map */
6364 8, /* strxfrm_multiply */
6365 1, /* caseup_multiply */
6366 1, /* casedn_multiply */
6367 1, /* mbminlen */
6368 3, /* mbmaxlen */
6369 1, /* mbmaxlenlen */
6370 9, /* min_sort_char */
6371 0xFFFF, /* max_sort_char */
6372 ' ', /* pad char */
6373 false, /* escape_with_backslash_is_dangerous */
6374 1, /* levels_for_compare */
6375 &my_charset_utf8_handler,
6376 &my_collation_any_uca_handler,
6377 PAD_SPACE};
6378
6379 CHARSET_INFO my_charset_utf8_swedish_uca_ci = {
6380 200,
6381 0,
6382 0, /* number */
6383 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6384 "utf8mb3", /* cs name */
6385 "utf8mb3_swedish_ci", /* m_coll_name */
6386 "UTF-8 Unicode", /* comment */
6387 swedish, /* tailoring */
6388 nullptr, /* coll_param */
6389 ctype_utf8, /* ctype */
6390 nullptr, /* to_lower */
6391 nullptr, /* to_upper */
6392 nullptr, /* sort_order */
6393 nullptr, /* uca */
6394 nullptr, /* tab_to_uni */
6395 nullptr, /* tab_from_uni */
6396 &my_unicase_default, /* caseinfo */
6397 nullptr, /* state_map */
6398 nullptr, /* ident_map */
6399 8, /* strxfrm_multiply */
6400 1, /* caseup_multiply */
6401 1, /* casedn_multiply */
6402 1, /* mbminlen */
6403 3, /* mbmaxlen */
6404 1, /* mbmaxlenlen */
6405 9, /* min_sort_char */
6406 0xFFFF, /* max_sort_char */
6407 ' ', /* pad char */
6408 false, /* escape_with_backslash_is_dangerous */
6409 1, /* levels_for_compare */
6410 &my_charset_utf8_handler,
6411 &my_collation_any_uca_handler,
6412 PAD_SPACE};
6413
6414 CHARSET_INFO my_charset_utf8_turkish_uca_ci = {
6415 201,
6416 0,
6417 0, /* number */
6418 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6419 "utf8mb3", /* cs name */
6420 "utf8mb3_turkish_ci", /* m_coll_name */
6421 "UTF-8 Unicode", /* comment */
6422 turkish, /* tailoring */
6423 nullptr, /* coll_param */
6424 ctype_utf8, /* ctype */
6425 nullptr, /* to_lower */
6426 nullptr, /* to_upper */
6427 nullptr, /* sort_order */
6428 nullptr, /* uca */
6429 nullptr, /* tab_to_uni */
6430 nullptr, /* tab_from_uni */
6431 &my_unicase_turkish, /* caseinfo */
6432 nullptr, /* state_map */
6433 nullptr, /* ident_map */
6434 8, /* strxfrm_multiply */
6435 2, /* caseup_multiply */
6436 2, /* casedn_multiply */
6437 1, /* mbminlen */
6438 3, /* mbmaxlen */
6439 1, /* mbmaxlenlen */
6440 9, /* min_sort_char */
6441 0xFFFF, /* max_sort_char */
6442 ' ', /* pad char */
6443 false, /* escape_with_backslash_is_dangerous */
6444 1, /* levels_for_compare */
6445 &my_charset_utf8_handler,
6446 &my_collation_any_uca_handler,
6447 PAD_SPACE};
6448
6449 CHARSET_INFO my_charset_utf8_czech_uca_ci = {
6450 202,
6451 0,
6452 0, /* number */
6453 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6454 "utf8mb3", /* cs name */
6455 "utf8mb3_czech_ci", /* m_coll_name */
6456 "UTF-8 Unicode", /* comment */
6457 czech, /* tailoring */
6458 nullptr, /* coll_param */
6459 ctype_utf8, /* ctype */
6460 nullptr, /* to_lower */
6461 nullptr, /* to_upper */
6462 nullptr, /* sort_order */
6463 nullptr, /* uca */
6464 nullptr, /* tab_to_uni */
6465 nullptr, /* tab_from_uni */
6466 &my_unicase_default, /* caseinfo */
6467 nullptr, /* state_map */
6468 nullptr, /* ident_map */
6469 8, /* strxfrm_multiply */
6470 1, /* caseup_multiply */
6471 1, /* casedn_multiply */
6472 1, /* mbminlen */
6473 3, /* mbmaxlen */
6474 1, /* mbmaxlenlen */
6475 9, /* min_sort_char */
6476 0xFFFF, /* max_sort_char */
6477 ' ', /* pad char */
6478 false, /* escape_with_backslash_is_dangerous */
6479 1, /* levels_for_compare */
6480 &my_charset_utf8_handler,
6481 &my_collation_any_uca_handler,
6482 PAD_SPACE};
6483
6484 CHARSET_INFO my_charset_utf8_danish_uca_ci = {
6485 203,
6486 0,
6487 0, /* number */
6488 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6489 "utf8mb3", /* cs name */
6490 "utf8mb3_danish_ci", /* m_coll_name */
6491 "UTF-8 Unicode", /* comment */
6492 danish, /* tailoring */
6493 nullptr, /* coll_param */
6494 ctype_utf8, /* ctype */
6495 nullptr, /* to_lower */
6496 nullptr, /* to_upper */
6497 nullptr, /* sort_order */
6498 nullptr, /* uca */
6499 nullptr, /* tab_to_uni */
6500 nullptr, /* tab_from_uni */
6501 &my_unicase_default, /* caseinfo */
6502 nullptr, /* state_map */
6503 nullptr, /* ident_map */
6504 8, /* strxfrm_multiply */
6505 1, /* caseup_multiply */
6506 1, /* casedn_multiply */
6507 1, /* mbminlen */
6508 3, /* mbmaxlen */
6509 1, /* mbmaxlenlen */
6510 9, /* min_sort_char */
6511 0xFFFF, /* max_sort_char */
6512 ' ', /* pad char */
6513 false, /* escape_with_backslash_is_dangerous */
6514 1, /* levels_for_compare */
6515 &my_charset_utf8_handler,
6516 &my_collation_any_uca_handler,
6517 PAD_SPACE};
6518
6519 CHARSET_INFO my_charset_utf8_lithuanian_uca_ci = {
6520 204,
6521 0,
6522 0, /* number */
6523 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6524 "utf8mb3", /* cs name */
6525 "utf8mb3_lithuanian_ci", /* m_coll_name */
6526 "UTF-8 Unicode", /* comment */
6527 lithuanian, /* tailoring */
6528 nullptr, /* coll_param */
6529 ctype_utf8, /* ctype */
6530 nullptr, /* to_lower */
6531 nullptr, /* to_upper */
6532 nullptr, /* sort_order */
6533 nullptr, /* uca */
6534 nullptr, /* tab_to_uni */
6535 nullptr, /* tab_from_uni */
6536 &my_unicase_default, /* caseinfo */
6537 nullptr, /* state_map */
6538 nullptr, /* ident_map */
6539 8, /* strxfrm_multiply */
6540 1, /* caseup_multiply */
6541 1, /* casedn_multiply */
6542 1, /* mbminlen */
6543 3, /* mbmaxlen */
6544 1, /* mbmaxlenlen */
6545 9, /* min_sort_char */
6546 0xFFFF, /* max_sort_char */
6547 ' ', /* pad char */
6548 false, /* escape_with_backslash_is_dangerous */
6549 1, /* levels_for_compare */
6550 &my_charset_utf8_handler,
6551 &my_collation_any_uca_handler,
6552 PAD_SPACE};
6553
6554 CHARSET_INFO my_charset_utf8_slovak_uca_ci = {
6555 205,
6556 0,
6557 0, /* number */
6558 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6559 "utf8mb3", /* cs name */
6560 "utf8mb3_slovak_ci", /* m_coll_name */
6561 "UTF-8 Unicode", /* comment */
6562 slovak, /* tailoring */
6563 nullptr, /* coll_param */
6564 ctype_utf8, /* ctype */
6565 nullptr, /* to_lower */
6566 nullptr, /* to_upper */
6567 nullptr, /* sort_order */
6568 nullptr, /* uca */
6569 nullptr, /* tab_to_uni */
6570 nullptr, /* tab_from_uni */
6571 &my_unicase_default, /* caseinfo */
6572 nullptr, /* state_map */
6573 nullptr, /* ident_map */
6574 8, /* strxfrm_multiply */
6575 1, /* caseup_multiply */
6576 1, /* casedn_multiply */
6577 1, /* mbminlen */
6578 3, /* mbmaxlen */
6579 1, /* mbmaxlenlen */
6580 9, /* min_sort_char */
6581 0xFFFF, /* max_sort_char */
6582 ' ', /* pad char */
6583 false, /* escape_with_backslash_is_dangerous */
6584 1, /* levels_for_compare */
6585 &my_charset_utf8_handler,
6586 &my_collation_any_uca_handler,
6587 PAD_SPACE};
6588
6589 CHARSET_INFO my_charset_utf8_spanish2_uca_ci = {
6590 206,
6591 0,
6592 0, /* number */
6593 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6594 "utf8mb3", /* cs name */
6595 "utf8mb3_spanish2_ci", /* m_coll_name */
6596 "UTF-8 Unicode", /* comment */
6597 spanish2, /* tailoring */
6598 nullptr, /* coll_param */
6599 ctype_utf8, /* ctype */
6600 nullptr, /* to_lower */
6601 nullptr, /* to_upper */
6602 nullptr, /* sort_order */
6603 nullptr, /* uca */
6604 nullptr, /* tab_to_uni */
6605 nullptr, /* tab_from_uni */
6606 &my_unicase_default, /* caseinfo */
6607 nullptr, /* state_map */
6608 nullptr, /* ident_map */
6609 8, /* strxfrm_multiply */
6610 1, /* caseup_multiply */
6611 1, /* casedn_multiply */
6612 1, /* mbminlen */
6613 3, /* mbmaxlen */
6614 1, /* mbmaxlenlen */
6615 9, /* min_sort_char */
6616 0xFFFF, /* max_sort_char */
6617 ' ', /* pad char */
6618 false, /* escape_with_backslash_is_dangerous */
6619 1, /* levels_for_compare */
6620 &my_charset_utf8_handler,
6621 &my_collation_any_uca_handler,
6622 PAD_SPACE};
6623
6624 CHARSET_INFO my_charset_utf8_roman_uca_ci = {
6625 207,
6626 0,
6627 0, /* number */
6628 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6629 "utf8mb3", /* cs name */
6630 "utf8mb3_roman_ci", /* m_coll_name */
6631 "UTF-8 Unicode", /* comment */
6632 roman, /* tailoring */
6633 nullptr, /* coll_param */
6634 ctype_utf8, /* ctype */
6635 nullptr, /* to_lower */
6636 nullptr, /* to_upper */
6637 nullptr, /* sort_order */
6638 nullptr, /* uca */
6639 nullptr, /* tab_to_uni */
6640 nullptr, /* tab_from_uni */
6641 &my_unicase_default, /* caseinfo */
6642 nullptr, /* state_map */
6643 nullptr, /* ident_map */
6644 8, /* strxfrm_multiply */
6645 1, /* caseup_multiply */
6646 1, /* casedn_multiply */
6647 1, /* mbminlen */
6648 3, /* mbmaxlen */
6649 1, /* mbmaxlenlen */
6650 9, /* min_sort_char */
6651 0xFFFF, /* max_sort_char */
6652 ' ', /* pad char */
6653 false, /* escape_with_backslash_is_dangerous */
6654 1, /* levels_for_compare */
6655 &my_charset_utf8_handler,
6656 &my_collation_any_uca_handler,
6657 PAD_SPACE};
6658
6659 CHARSET_INFO my_charset_utf8_persian_uca_ci = {
6660 208,
6661 0,
6662 0, /* number */
6663 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6664 "utf8mb3", /* cs name */
6665 "utf8mb3_persian_ci", /* m_coll_name */
6666 "UTF-8 Unicode", /* comment */
6667 persian, /* tailoring */
6668 nullptr, /* coll_param */
6669 ctype_utf8, /* ctype */
6670 nullptr, /* to_lower */
6671 nullptr, /* to_upper */
6672 nullptr, /* sort_order */
6673 nullptr, /* uca */
6674 nullptr, /* tab_to_uni */
6675 nullptr, /* tab_from_uni */
6676 &my_unicase_default, /* caseinfo */
6677 nullptr, /* state_map */
6678 nullptr, /* ident_map */
6679 8, /* strxfrm_multiply */
6680 1, /* caseup_multiply */
6681 1, /* casedn_multiply */
6682 1, /* mbminlen */
6683 3, /* mbmaxlen */
6684 1, /* mbmaxlenlen */
6685 9, /* min_sort_char */
6686 0xFFFF, /* max_sort_char */
6687 ' ', /* pad char */
6688 false, /* escape_with_backslash_is_dangerous */
6689 1, /* levels_for_compare */
6690 &my_charset_utf8_handler,
6691 &my_collation_any_uca_handler,
6692 PAD_SPACE};
6693
6694 CHARSET_INFO my_charset_utf8_esperanto_uca_ci = {
6695 209,
6696 0,
6697 0, /* number */
6698 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6699 "utf8mb3", /* cs name */
6700 "utf8mb3_esperanto_ci", /* m_coll_name */
6701 "UTF-8 Unicode", /* comment */
6702 esperanto, /* tailoring */
6703 nullptr, /* coll_param */
6704 ctype_utf8, /* ctype */
6705 nullptr, /* to_lower */
6706 nullptr, /* to_upper */
6707 nullptr, /* sort_order */
6708 nullptr, /* uca */
6709 nullptr, /* tab_to_uni */
6710 nullptr, /* tab_from_uni */
6711 &my_unicase_default, /* caseinfo */
6712 nullptr, /* state_map */
6713 nullptr, /* ident_map */
6714 8, /* strxfrm_multiply */
6715 1, /* caseup_multiply */
6716 1, /* casedn_multiply */
6717 1, /* mbminlen */
6718 3, /* mbmaxlen */
6719 1, /* mbmaxlenlen */
6720 9, /* min_sort_char */
6721 0xFFFF, /* max_sort_char */
6722 ' ', /* pad char */
6723 false, /* escape_with_backslash_is_dangerous */
6724 1, /* levels_for_compare */
6725 &my_charset_utf8_handler,
6726 &my_collation_any_uca_handler,
6727 PAD_SPACE};
6728
6729 CHARSET_INFO my_charset_utf8_hungarian_uca_ci = {
6730 210,
6731 0,
6732 0, /* number */
6733 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6734 "utf8mb3", /* cs name */
6735 "utf8mb3_hungarian_ci", /* m_coll_name */
6736 "UTF-8 Unicode", /* comment */
6737 hungarian, /* tailoring */
6738 nullptr, /* coll_param */
6739 ctype_utf8, /* ctype */
6740 nullptr, /* to_lower */
6741 nullptr, /* to_upper */
6742 nullptr, /* sort_order */
6743 nullptr, /* uca */
6744 nullptr, /* tab_to_uni */
6745 nullptr, /* tab_from_uni */
6746 &my_unicase_default, /* caseinfo */
6747 nullptr, /* state_map */
6748 nullptr, /* ident_map */
6749 8, /* strxfrm_multiply */
6750 1, /* caseup_multiply */
6751 1, /* casedn_multiply */
6752 1, /* mbminlen */
6753 3, /* mbmaxlen */
6754 1, /* mbmaxlenlen */
6755 9, /* min_sort_char */
6756 0xFFFF, /* max_sort_char */
6757 ' ', /* pad char */
6758 false, /* escape_with_backslash_is_dangerous */
6759 1, /* levels_for_compare */
6760 &my_charset_utf8_handler,
6761 &my_collation_any_uca_handler,
6762 PAD_SPACE};
6763
6764 CHARSET_INFO my_charset_utf8_sinhala_uca_ci = {
6765 211,
6766 0,
6767 0, /* number */
6768 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6769 "utf8mb3", /* cs name */
6770 "utf8mb3_sinhala_ci", /* m_coll_name */
6771 "UTF-8 Unicode", /* comment */
6772 sinhala, /* tailoring */
6773 nullptr, /* coll_param */
6774 ctype_utf8, /* ctype */
6775 nullptr, /* to_lower */
6776 nullptr, /* to_upper */
6777 nullptr, /* sort_order */
6778 nullptr, /* uca */
6779 nullptr, /* tab_to_uni */
6780 nullptr, /* tab_from_uni */
6781 &my_unicase_default, /* caseinfo */
6782 nullptr, /* state_map */
6783 nullptr, /* ident_map */
6784 8, /* strxfrm_multiply */
6785 1, /* caseup_multiply */
6786 1, /* casedn_multiply */
6787 1, /* mbminlen */
6788 3, /* mbmaxlen */
6789 1, /* mbmaxlenlen */
6790 9, /* min_sort_char */
6791 0xFFFF, /* max_sort_char */
6792 ' ', /* pad char */
6793 false, /* escape_with_backslash_is_dangerous */
6794 1, /* levels_for_compare */
6795 &my_charset_utf8_handler,
6796 &my_collation_any_uca_handler,
6797 PAD_SPACE};
6798
6799 CHARSET_INFO my_charset_utf8_german2_uca_ci = {
6800 212,
6801 0,
6802 0, /* number */
6803 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6804 "utf8mb3", /* cs name */
6805 "utf8mb3_german2_ci", /* m_coll_name */
6806 "UTF-8 Unicode", /* comment */
6807 german2, /* tailoring */
6808 nullptr, /* coll_param */
6809 ctype_utf8, /* ctype */
6810 nullptr, /* to_lower */
6811 nullptr, /* to_upper */
6812 nullptr, /* sort_order */
6813 nullptr, /* uca */
6814 nullptr, /* tab_to_uni */
6815 nullptr, /* tab_from_uni */
6816 &my_unicase_default, /* caseinfo */
6817 nullptr, /* state_map */
6818 nullptr, /* ident_map */
6819 8, /* strxfrm_multiply */
6820 1, /* caseup_multiply */
6821 1, /* casedn_multiply */
6822 1, /* mbminlen */
6823 3, /* mbmaxlen */
6824 1, /* mbmaxlenlen */
6825 9, /* min_sort_char */
6826 0xFFFF, /* max_sort_char */
6827 ' ', /* pad char */
6828 false, /* escape_with_backslash_is_dangerous */
6829 1, /* levels_for_compare */
6830 &my_charset_utf8_handler,
6831 &my_collation_any_uca_handler,
6832 PAD_SPACE};
6833
6834 CHARSET_INFO my_charset_utf8_croatian_uca_ci = {
6835 213,
6836 0,
6837 0, /* number */
6838 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6839 "utf8mb3", /* cs name */
6840 "utf8mb3_croatian_ci", /* m_coll_name */
6841 "UTF-8 Unicode", /* comment */
6842 croatian, /* tailoring */
6843 nullptr, /* coll_param */
6844 ctype_utf8, /* ctype */
6845 nullptr, /* to_lower */
6846 nullptr, /* to_upper */
6847 nullptr, /* sort_order */
6848 nullptr, /* uca */
6849 nullptr, /* tab_to_uni */
6850 nullptr, /* tab_from_uni */
6851 &my_unicase_default, /* caseinfo */
6852 nullptr, /* state_map */
6853 nullptr, /* ident_map */
6854 8, /* strxfrm_multiply */
6855 1, /* caseup_multiply */
6856 1, /* casedn_multiply */
6857 1, /* mbminlen */
6858 3, /* mbmaxlen */
6859 1, /* mbmaxlenlen */
6860 9, /* min_sort_char */
6861 0xFFFF, /* max_sort_char */
6862 ' ', /* pad char */
6863 false, /* escape_with_backslash_is_dangerous */
6864 1, /* levels_for_compare */
6865 &my_charset_utf8_handler,
6866 &my_collation_any_uca_handler,
6867 PAD_SPACE};
6868
6869 CHARSET_INFO my_charset_utf8_unicode_520_ci = {
6870 214,
6871 0,
6872 0, /* number */
6873 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6874 "utf8mb3", /* csname */
6875 "utf8mb3_unicode_520_ci", /* m_coll_name */
6876 "UTF-8 Unicode", /* comment */
6877 "", /* tailoring */
6878 nullptr, /* coll_param */
6879 ctype_utf8, /* ctype */
6880 nullptr, /* to_lower */
6881 nullptr, /* to_upper */
6882 nullptr, /* sort_order */
6883 &my_uca_v520, /* uca */
6884 nullptr, /* tab_to_uni */
6885 nullptr, /* tab_from_uni */
6886 &my_unicase_unicode520, /* caseinfo */
6887 nullptr, /* state_map */
6888 nullptr, /* ident_map */
6889 8, /* strxfrm_multiply */
6890 1, /* caseup_multiply */
6891 1, /* casedn_multiply */
6892 1, /* mbminlen */
6893 3, /* mbmaxlen */
6894 1, /* mbmaxlenlen */
6895 9, /* min_sort_char */
6896 0xFFFF, /* max_sort_char */
6897 ' ', /* pad char */
6898 false, /* escape_with_backslash_is_dangerous */
6899 1, /* levels_for_compare */
6900 &my_charset_utf8_handler,
6901 &my_collation_any_uca_handler,
6902 PAD_SPACE};
6903
6904 CHARSET_INFO my_charset_utf8_vietnamese_ci = {
6905 215,
6906 0,
6907 0, /* number */
6908 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6909 "utf8mb3", /* cs name */
6910 "utf8mb3_vietnamese_ci", /* m_coll_name */
6911 "UTF-8 Unicode", /* comment */
6912 vietnamese, /* tailoring */
6913 nullptr, /* coll_param */
6914 ctype_utf8, /* ctype */
6915 nullptr, /* to_lower */
6916 nullptr, /* to_upper */
6917 nullptr, /* sort_order */
6918 nullptr, /* uca */
6919 nullptr, /* tab_to_uni */
6920 nullptr, /* tab_from_uni */
6921 &my_unicase_default, /* caseinfo */
6922 nullptr, /* state_map */
6923 nullptr, /* ident_map */
6924 8, /* strxfrm_multiply */
6925 1, /* caseup_multiply */
6926 1, /* casedn_multiply */
6927 1, /* mbminlen */
6928 3, /* mbmaxlen */
6929 1, /* mbmaxlenlen */
6930 9, /* min_sort_char */
6931 0xFFFF, /* max_sort_char */
6932 ' ', /* pad char */
6933 false, /* escape_with_backslash_is_dangerous */
6934 1, /* levels_for_compare */
6935 &my_charset_utf8_handler,
6936 &my_collation_any_uca_handler,
6937 PAD_SPACE};
6938
6939 extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler;
6940
6941 #define MY_CS_UTF8MB4_UCA_FLAGS \
6942 (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_UNICODE_SUPPLEMENT)
6943
6944 CHARSET_INFO my_charset_utf8mb4_unicode_ci = {
6945 224,
6946 0,
6947 0, /* number */
6948 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
6949 MY_UTF8MB4, /* csname */
6950 MY_UTF8MB4 "_unicode_ci", /* m_coll_name */
6951 "UTF-8 Unicode", /* comment */
6952 "", /* tailoring */
6953 nullptr, /* coll_param */
6954 ctype_utf8, /* ctype */
6955 nullptr, /* to_lower */
6956 nullptr, /* to_upper */
6957 nullptr, /* sort_order */
6958 nullptr, /* uca */
6959 nullptr, /* tab_to_uni */
6960 nullptr, /* tab_from_uni */
6961 &my_unicase_default, /* caseinfo */
6962 nullptr, /* state_map */
6963 nullptr, /* ident_map */
6964 8, /* strxfrm_multiply */
6965 1, /* caseup_multiply */
6966 1, /* casedn_multiply */
6967 1, /* mbminlen */
6968 4, /* mbmaxlen */
6969 1, /* mbmaxlenlen */
6970 9, /* min_sort_char */
6971 0xFFFF, /* max_sort_char */
6972 ' ', /* pad char */
6973 false, /* escape_with_backslash_is_dangerous */
6974 1, /* levels_for_compare */
6975 &my_charset_utf8mb4_handler,
6976 &my_collation_any_uca_handler,
6977 PAD_SPACE};
6978
6979 CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci = {
6980 225,
6981 0,
6982 0, /* number */
6983 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
6984 MY_UTF8MB4, /* csname */
6985 MY_UTF8MB4 "_icelandic_ci", /* m_coll_name */
6986 "UTF-8 Unicode", /* comment */
6987 icelandic, /* tailoring */
6988 nullptr, /* coll_param */
6989 ctype_utf8, /* ctype */
6990 nullptr, /* to_lower */
6991 nullptr, /* to_upper */
6992 nullptr, /* sort_order */
6993 nullptr, /* uca */
6994 nullptr, /* tab_to_uni */
6995 nullptr, /* tab_from_uni */
6996 &my_unicase_default, /* caseinfo */
6997 nullptr, /* state_map */
6998 nullptr, /* ident_map */
6999 8, /* strxfrm_multiply */
7000 1, /* caseup_multiply */
7001 1, /* casedn_multiply */
7002 1, /* mbminlen */
7003 4, /* mbmaxlen */
7004 1, /* mbmaxlenlen */
7005 9, /* min_sort_char */
7006 0xFFFF, /* max_sort_char */
7007 ' ', /* pad char */
7008 false, /* escape_with_backslash_is_dangerous */
7009 1, /* levels_for_compare */
7010 &my_charset_utf8mb4_handler,
7011 &my_collation_any_uca_handler,
7012 PAD_SPACE};
7013
7014 CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci = {
7015 226,
7016 0,
7017 0, /* number */
7018 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7019 MY_UTF8MB4, /* csname */
7020 MY_UTF8MB4 "_latvian_ci", /* m_coll_name */
7021 "UTF-8 Unicode", /* comment */
7022 latvian, /* tailoring */
7023 nullptr, /* coll_param */
7024 ctype_utf8, /* ctype */
7025 nullptr, /* to_lower */
7026 nullptr, /* to_upper */
7027 nullptr, /* sort_order */
7028 nullptr, /* uca */
7029 nullptr, /* tab_to_uni */
7030 nullptr, /* tab_from_uni */
7031 &my_unicase_default, /* caseinfo */
7032 nullptr, /* state_map */
7033 nullptr, /* ident_map */
7034 8, /* strxfrm_multiply */
7035 1, /* caseup_multiply */
7036 1, /* casedn_multiply */
7037 1, /* mbminlen */
7038 4, /* mbmaxlen */
7039 1, /* mbmaxlenlen */
7040 9, /* min_sort_char */
7041 0xFFFF, /* max_sort_char */
7042 ' ', /* pad char */
7043 false, /* escape_with_backslash_is_dangerous */
7044 1, /* levels_for_compare */
7045 &my_charset_utf8mb4_handler,
7046 &my_collation_any_uca_handler,
7047 PAD_SPACE};
7048
7049 CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci = {
7050 227,
7051 0,
7052 0, /* number */
7053 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7054 MY_UTF8MB4, /* csname */
7055 MY_UTF8MB4 "_romanian_ci", /* m_coll_name */
7056 "UTF-8 Unicode", /* comment */
7057 romanian, /* tailoring */
7058 nullptr, /* coll_param */
7059 ctype_utf8, /* ctype */
7060 nullptr, /* to_lower */
7061 nullptr, /* to_upper */
7062 nullptr, /* sort_order */
7063 nullptr, /* uca */
7064 nullptr, /* tab_to_uni */
7065 nullptr, /* tab_from_uni */
7066 &my_unicase_default, /* caseinfo */
7067 nullptr, /* state_map */
7068 nullptr, /* ident_map */
7069 8, /* strxfrm_multiply */
7070 1, /* caseup_multiply */
7071 1, /* casedn_multiply */
7072 1, /* mbminlen */
7073 4, /* mbmaxlen */
7074 1, /* mbmaxlenlen */
7075 9, /* min_sort_char */
7076 0xFFFF, /* max_sort_char */
7077 ' ', /* pad char */
7078 false, /* escape_with_backslash_is_dangerous */
7079 1, /* levels_for_compare */
7080 &my_charset_utf8mb4_handler,
7081 &my_collation_any_uca_handler,
7082 PAD_SPACE};
7083
7084 CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci = {
7085 228,
7086 0,
7087 0, /* number */
7088 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7089 MY_UTF8MB4, /* csname */
7090 MY_UTF8MB4 "_slovenian_ci", /* m_coll_name */
7091 "UTF-8 Unicode", /* comment */
7092 slovenian, /* tailoring */
7093 nullptr, /* coll_param */
7094 ctype_utf8, /* ctype */
7095 nullptr, /* to_lower */
7096 nullptr, /* to_upper */
7097 nullptr, /* sort_order */
7098 nullptr, /* uca */
7099 nullptr, /* tab_to_uni */
7100 nullptr, /* tab_from_uni */
7101 &my_unicase_default, /* caseinfo */
7102 nullptr, /* state_map */
7103 nullptr, /* ident_map */
7104 8, /* strxfrm_multiply */
7105 1, /* caseup_multiply */
7106 1, /* casedn_multiply */
7107 1, /* mbminlen */
7108 4, /* mbmaxlen */
7109 1, /* mbmaxlenlen */
7110 9, /* min_sort_char */
7111 0xFFFF, /* max_sort_char */
7112 ' ', /* pad char */
7113 false, /* escape_with_backslash_is_dangerous */
7114 1, /* levels_for_compare */
7115 &my_charset_utf8mb4_handler,
7116 &my_collation_any_uca_handler,
7117 PAD_SPACE};
7118
7119 CHARSET_INFO my_charset_utf8mb4_polish_uca_ci = {
7120 229,
7121 0,
7122 0, /* number */
7123 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7124 MY_UTF8MB4, /* csname */
7125 MY_UTF8MB4 "_polish_ci", /* m_coll_name */
7126 "UTF-8 Unicode", /* comment */
7127 polish, /* tailoring */
7128 nullptr, /* coll_param */
7129 ctype_utf8, /* ctype */
7130 nullptr, /* to_lower */
7131 nullptr, /* to_upper */
7132 nullptr, /* sort_order */
7133 nullptr, /* uca */
7134 nullptr, /* tab_to_uni */
7135 nullptr, /* tab_from_uni */
7136 &my_unicase_default, /* caseinfo */
7137 nullptr, /* state_map */
7138 nullptr, /* ident_map */
7139 8, /* strxfrm_multiply */
7140 1, /* caseup_multiply */
7141 1, /* casedn_multiply */
7142 1, /* mbminlen */
7143 4, /* mbmaxlen */
7144 1, /* mbmaxlenlen */
7145 9, /* min_sort_char */
7146 0xFFFF, /* max_sort_char */
7147 ' ', /* pad char */
7148 false, /* escape_with_backslash_is_dangerous */
7149 1, /* levels_for_compare */
7150 &my_charset_utf8mb4_handler,
7151 &my_collation_any_uca_handler,
7152 PAD_SPACE};
7153
7154 CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci = {
7155 230,
7156 0,
7157 0, /* number */
7158 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7159 MY_UTF8MB4, /* csname */
7160 MY_UTF8MB4 "_estonian_ci", /* m_coll_name */
7161 "UTF-8 Unicode", /* comment */
7162 estonian, /* tailoring */
7163 nullptr, /* coll_param */
7164 ctype_utf8, /* ctype */
7165 nullptr, /* to_lower */
7166 nullptr, /* to_upper */
7167 nullptr, /* sort_order */
7168 nullptr, /* uca */
7169 nullptr, /* tab_to_uni */
7170 nullptr, /* tab_from_uni */
7171 &my_unicase_default, /* caseinfo */
7172 nullptr, /* state_map */
7173 nullptr, /* ident_map */
7174 8, /* strxfrm_multiply */
7175 1, /* caseup_multiply */
7176 1, /* casedn_multiply */
7177 1, /* mbminlen */
7178 4, /* mbmaxlen */
7179 1, /* mbmaxlenlen */
7180 9, /* min_sort_char */
7181 0xFFFF, /* max_sort_char */
7182 ' ', /* pad char */
7183 false, /* escape_with_backslash_is_dangerous */
7184 1, /* levels_for_compare */
7185 &my_charset_utf8mb4_handler,
7186 &my_collation_any_uca_handler,
7187 PAD_SPACE};
7188
7189 CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci = {
7190 231,
7191 0,
7192 0, /* number */
7193 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7194 MY_UTF8MB4, /* csname */
7195 MY_UTF8MB4 "_spanish_ci", /* m_coll_name */
7196 "UTF-8 Unicode", /* comment */
7197 spanish, /* tailoring */
7198 nullptr, /* coll_param */
7199 ctype_utf8, /* ctype */
7200 nullptr, /* to_lower */
7201 nullptr, /* to_upper */
7202 nullptr, /* sort_order */
7203 nullptr, /* uca */
7204 nullptr, /* tab_to_uni */
7205 nullptr, /* tab_from_uni */
7206 &my_unicase_default, /* caseinfo */
7207 nullptr, /* state_map */
7208 nullptr, /* ident_map */
7209 8, /* strxfrm_multiply */
7210 1, /* caseup_multiply */
7211 1, /* casedn_multiply */
7212 1, /* mbminlen */
7213 4, /* mbmaxlen */
7214 1, /* mbmaxlenlen */
7215 9, /* min_sort_char */
7216 0xFFFF, /* max_sort_char */
7217 ' ', /* pad char */
7218 false, /* escape_with_backslash_is_dangerous */
7219 1, /* levels_for_compare */
7220 &my_charset_utf8mb4_handler,
7221 &my_collation_any_uca_handler,
7222 PAD_SPACE};
7223
7224 CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci = {
7225 232,
7226 0,
7227 0, /* number */
7228 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7229 MY_UTF8MB4, /* csname */
7230 MY_UTF8MB4 "_swedish_ci", /* m_coll_name */
7231 "UTF-8 Unicode", /* comment */
7232 swedish, /* tailoring */
7233 nullptr, /* coll_param */
7234 ctype_utf8, /* ctype */
7235 nullptr, /* to_lower */
7236 nullptr, /* to_upper */
7237 nullptr, /* sort_order */
7238 nullptr, /* uca */
7239 nullptr, /* tab_to_uni */
7240 nullptr, /* tab_from_uni */
7241 &my_unicase_default, /* caseinfo */
7242 nullptr, /* state_map */
7243 nullptr, /* ident_map */
7244 8, /* strxfrm_multiply */
7245 1, /* caseup_multiply */
7246 1, /* casedn_multiply */
7247 1, /* mbminlen */
7248 4, /* mbmaxlen */
7249 1, /* mbmaxlenlen */
7250 9, /* min_sort_char */
7251 0xFFFF, /* max_sort_char */
7252 ' ', /* pad char */
7253 false, /* escape_with_backslash_is_dangerous */
7254 1, /* levels_for_compare */
7255 &my_charset_utf8mb4_handler,
7256 &my_collation_any_uca_handler,
7257 PAD_SPACE};
7258
7259 CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci = {
7260 233,
7261 0,
7262 0, /* number */
7263 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7264 MY_UTF8MB4, /* csname */
7265 MY_UTF8MB4 "_turkish_ci", /* m_coll_name */
7266 "UTF-8 Unicode", /* comment */
7267 turkish, /* tailoring */
7268 nullptr, /* coll_param */
7269 ctype_utf8, /* ctype */
7270 nullptr, /* to_lower */
7271 nullptr, /* to_upper */
7272 nullptr, /* sort_order */
7273 nullptr, /* uca */
7274 nullptr, /* tab_to_uni */
7275 nullptr, /* tab_from_uni */
7276 &my_unicase_turkish, /* caseinfo */
7277 nullptr, /* state_map */
7278 nullptr, /* ident_map */
7279 8, /* strxfrm_multiply */
7280 2, /* caseup_multiply */
7281 2, /* casedn_multiply */
7282 1, /* mbminlen */
7283 4, /* mbmaxlen */
7284 1, /* mbmaxlenlen */
7285 9, /* min_sort_char */
7286 0xFFFF, /* max_sort_char */
7287 ' ', /* pad char */
7288 false, /* escape_with_backslash_is_dangerous */
7289 1, /* levels_for_compare */
7290 &my_charset_utf8mb4_handler,
7291 &my_collation_any_uca_handler,
7292 PAD_SPACE};
7293
7294 CHARSET_INFO my_charset_utf8mb4_czech_uca_ci = {
7295 234,
7296 0,
7297 0, /* number */
7298 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7299 MY_UTF8MB4, /* csname */
7300 MY_UTF8MB4 "_czech_ci", /* m_coll_name */
7301 "UTF-8 Unicode", /* comment */
7302 czech, /* tailoring */
7303 nullptr, /* coll_param */
7304 ctype_utf8, /* ctype */
7305 nullptr, /* to_lower */
7306 nullptr, /* to_upper */
7307 nullptr, /* sort_order */
7308 nullptr, /* uca */
7309 nullptr, /* tab_to_uni */
7310 nullptr, /* tab_from_uni */
7311 &my_unicase_default, /* caseinfo */
7312 nullptr, /* state_map */
7313 nullptr, /* ident_map */
7314 8, /* strxfrm_multiply */
7315 1, /* caseup_multiply */
7316 1, /* casedn_multiply */
7317 1, /* mbminlen */
7318 4, /* mbmaxlen */
7319 1, /* mbmaxlenlen */
7320 9, /* min_sort_char */
7321 0xFFFF, /* max_sort_char */
7322 ' ', /* pad char */
7323 false, /* escape_with_backslash_is_dangerous */
7324 1, /* levels_for_compare */
7325 &my_charset_utf8mb4_handler,
7326 &my_collation_any_uca_handler,
7327 PAD_SPACE};
7328
7329 CHARSET_INFO my_charset_utf8mb4_danish_uca_ci = {
7330 235,
7331 0,
7332 0, /* number */
7333 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7334 MY_UTF8MB4, /* csname */
7335 MY_UTF8MB4 "_danish_ci", /* m_coll_name */
7336 "UTF-8 Unicode", /* comment */
7337 danish, /* tailoring */
7338 nullptr, /* coll_param */
7339 ctype_utf8, /* ctype */
7340 nullptr, /* to_lower */
7341 nullptr, /* to_upper */
7342 nullptr, /* sort_order */
7343 nullptr, /* uca */
7344 nullptr, /* tab_to_uni */
7345 nullptr, /* tab_from_uni */
7346 &my_unicase_default, /* caseinfo */
7347 nullptr, /* state_map */
7348 nullptr, /* ident_map */
7349 8, /* strxfrm_multiply */
7350 1, /* caseup_multiply */
7351 1, /* casedn_multiply */
7352 1, /* mbminlen */
7353 4, /* mbmaxlen */
7354 1, /* mbmaxlenlen */
7355 9, /* min_sort_char */
7356 0xFFFF, /* max_sort_char */
7357 ' ', /* pad char */
7358 false, /* escape_with_backslash_is_dangerous */
7359 1, /* levels_for_compare */
7360 &my_charset_utf8mb4_handler,
7361 &my_collation_any_uca_handler,
7362 PAD_SPACE};
7363
7364 CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci = {
7365 236,
7366 0,
7367 0, /* number */
7368 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7369 MY_UTF8MB4, /* csname */
7370 MY_UTF8MB4 "_lithuanian_ci", /* m_coll_name */
7371 "UTF-8 Unicode", /* comment */
7372 lithuanian, /* tailoring */
7373 nullptr, /* coll_param */
7374 ctype_utf8, /* ctype */
7375 nullptr, /* to_lower */
7376 nullptr, /* to_upper */
7377 nullptr, /* sort_order */
7378 nullptr, /* uca */
7379 nullptr, /* tab_to_uni */
7380 nullptr, /* tab_from_uni */
7381 &my_unicase_default, /* caseinfo */
7382 nullptr, /* state_map */
7383 nullptr, /* ident_map */
7384 8, /* strxfrm_multiply */
7385 1, /* caseup_multiply */
7386 1, /* casedn_multiply */
7387 1, /* mbminlen */
7388 4, /* mbmaxlen */
7389 1, /* mbmaxlenlen */
7390 9, /* min_sort_char */
7391 0xFFFF, /* max_sort_char */
7392 ' ', /* pad char */
7393 false, /* escape_with_backslash_is_dangerous */
7394 1, /* levels_for_compare */
7395 &my_charset_utf8mb4_handler,
7396 &my_collation_any_uca_handler,
7397 PAD_SPACE};
7398
7399 CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci = {
7400 237,
7401 0,
7402 0, /* number */
7403 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7404 MY_UTF8MB4, /* csname */
7405 MY_UTF8MB4 "_slovak_ci", /* m_coll_name */
7406 "UTF-8 Unicode", /* comment */
7407 slovak, /* tailoring */
7408 nullptr, /* coll_param */
7409 ctype_utf8, /* ctype */
7410 nullptr, /* to_lower */
7411 nullptr, /* to_upper */
7412 nullptr, /* sort_order */
7413 nullptr, /* uca */
7414 nullptr, /* tab_to_uni */
7415 nullptr, /* tab_from_uni */
7416 &my_unicase_default, /* caseinfo */
7417 nullptr, /* state_map */
7418 nullptr, /* ident_map */
7419 8, /* strxfrm_multiply */
7420 1, /* caseup_multiply */
7421 1, /* casedn_multiply */
7422 1, /* mbminlen */
7423 4, /* mbmaxlen */
7424 1, /* mbmaxlenlen */
7425 9, /* min_sort_char */
7426 0xFFFF, /* max_sort_char */
7427 ' ', /* pad char */
7428 false, /* escape_with_backslash_is_dangerous */
7429 1, /* levels_for_compare */
7430 &my_charset_utf8mb4_handler,
7431 &my_collation_any_uca_handler,
7432 PAD_SPACE};
7433
7434 CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci = {
7435 238,
7436 0,
7437 0, /* number */
7438 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7439 MY_UTF8MB4, /* csname */
7440 MY_UTF8MB4 "_spanish2_ci", /* m_coll_name */
7441 "UTF-8 Unicode", /* comment */
7442 spanish2, /* tailoring */
7443 nullptr, /* coll_param */
7444 ctype_utf8, /* ctype */
7445 nullptr, /* to_lower */
7446 nullptr, /* to_upper */
7447 nullptr, /* sort_order */
7448 nullptr, /* uca */
7449 nullptr, /* tab_to_uni */
7450 nullptr, /* tab_from_uni */
7451 &my_unicase_default, /* caseinfo */
7452 nullptr, /* state_map */
7453 nullptr, /* ident_map */
7454 8, /* strxfrm_multiply */
7455 1, /* caseup_multiply */
7456 1, /* casedn_multiply */
7457 1, /* mbminlen */
7458 4, /* mbmaxlen */
7459 1, /* mbmaxlenlen */
7460 9, /* min_sort_char */
7461 0xFFFF, /* max_sort_char */
7462 ' ', /* pad char */
7463 false, /* escape_with_backslash_is_dangerous */
7464 1, /* levels_for_compare */
7465 &my_charset_utf8mb4_handler,
7466 &my_collation_any_uca_handler,
7467 PAD_SPACE};
7468
7469 CHARSET_INFO my_charset_utf8mb4_roman_uca_ci = {
7470 239,
7471 0,
7472 0, /* number */
7473 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7474 MY_UTF8MB4, /* csname */
7475 MY_UTF8MB4 "_roman_ci", /* m_coll_name */
7476 "UTF-8 Unicode", /* comment */
7477 roman, /* tailoring */
7478 nullptr, /* coll_param */
7479 ctype_utf8, /* ctype */
7480 nullptr, /* to_lower */
7481 nullptr, /* to_upper */
7482 nullptr, /* sort_order */
7483 nullptr, /* uca */
7484 nullptr, /* tab_to_uni */
7485 nullptr, /* tab_from_uni */
7486 &my_unicase_default, /* caseinfo */
7487 nullptr, /* state_map */
7488 nullptr, /* ident_map */
7489 8, /* strxfrm_multiply */
7490 1, /* caseup_multiply */
7491 1, /* casedn_multiply */
7492 1, /* mbminlen */
7493 4, /* mbmaxlen */
7494 1, /* mbmaxlenlen */
7495 9, /* min_sort_char */
7496 0xFFFF, /* max_sort_char */
7497 ' ', /* pad char */
7498 false, /* escape_with_backslash_is_dangerous */
7499 1, /* levels_for_compare */
7500 &my_charset_utf8mb4_handler,
7501 &my_collation_any_uca_handler,
7502 PAD_SPACE};
7503
7504 CHARSET_INFO my_charset_utf8mb4_persian_uca_ci = {
7505 240,
7506 0,
7507 0, /* number */
7508 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7509 MY_UTF8MB4, /* csname */
7510 MY_UTF8MB4 "_persian_ci", /* m_coll_name */
7511 "UTF-8 Unicode", /* comment */
7512 persian, /* tailoring */
7513 nullptr, /* coll_param */
7514 ctype_utf8, /* ctype */
7515 nullptr, /* to_lower */
7516 nullptr, /* to_upper */
7517 nullptr, /* sort_order */
7518 nullptr, /* uca */
7519 nullptr, /* tab_to_uni */
7520 nullptr, /* tab_from_uni */
7521 &my_unicase_default, /* caseinfo */
7522 nullptr, /* state_map */
7523 nullptr, /* ident_map */
7524 8, /* strxfrm_multiply */
7525 1, /* caseup_multiply */
7526 1, /* casedn_multiply */
7527 1, /* mbminlen */
7528 4, /* mbmaxlen */
7529 1, /* mbmaxlenlen */
7530 9, /* min_sort_char */
7531 0xFFFF, /* max_sort_char */
7532 ' ', /* pad char */
7533 false, /* escape_with_backslash_is_dangerous */
7534 1, /* levels_for_compare */
7535 &my_charset_utf8mb4_handler,
7536 &my_collation_any_uca_handler,
7537 PAD_SPACE};
7538
7539 CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci = {
7540 241,
7541 0,
7542 0, /* number */
7543 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7544 MY_UTF8MB4, /* csname */
7545 MY_UTF8MB4 "_esperanto_ci", /* m_coll_name */
7546 "UTF-8 Unicode", /* comment */
7547 esperanto, /* tailoring */
7548 nullptr, /* coll_param */
7549 ctype_utf8, /* ctype */
7550 nullptr, /* to_lower */
7551 nullptr, /* to_upper */
7552 nullptr, /* sort_order */
7553 nullptr, /* uca */
7554 nullptr, /* tab_to_uni */
7555 nullptr, /* tab_from_uni */
7556 &my_unicase_default, /* caseinfo */
7557 nullptr, /* state_map */
7558 nullptr, /* ident_map */
7559 8, /* strxfrm_multiply */
7560 1, /* caseup_multiply */
7561 1, /* casedn_multiply */
7562 1, /* mbminlen */
7563 4, /* mbmaxlen */
7564 1, /* mbmaxlenlen */
7565 9, /* min_sort_char */
7566 0xFFFF, /* max_sort_char */
7567 ' ', /* pad char */
7568 false, /* escape_with_backslash_is_dangerous */
7569 1, /* levels_for_compare */
7570 &my_charset_utf8mb4_handler,
7571 &my_collation_any_uca_handler,
7572 PAD_SPACE};
7573
7574 CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci = {
7575 242,
7576 0,
7577 0, /* number */
7578 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7579 MY_UTF8MB4, /* csname */
7580 MY_UTF8MB4 "_hungarian_ci", /* m_coll_name */
7581 "UTF-8 Unicode", /* comment */
7582 hungarian, /* tailoring */
7583 nullptr, /* coll_param */
7584 ctype_utf8, /* ctype */
7585 nullptr, /* to_lower */
7586 nullptr, /* to_upper */
7587 nullptr, /* sort_order */
7588 nullptr, /* uca */
7589 nullptr, /* tab_to_uni */
7590 nullptr, /* tab_from_uni */
7591 &my_unicase_default, /* caseinfo */
7592 nullptr, /* state_map */
7593 nullptr, /* ident_map */
7594 8, /* strxfrm_multiply */
7595 1, /* caseup_multiply */
7596 1, /* casedn_multiply */
7597 1, /* mbminlen */
7598 4, /* mbmaxlen */
7599 1, /* mbmaxlenlen */
7600 9, /* min_sort_char */
7601 0xFFFF, /* max_sort_char */
7602 ' ', /* pad char */
7603 false, /* escape_with_backslash_is_dangerous */
7604 1, /* levels_for_compare */
7605 &my_charset_utf8mb4_handler,
7606 &my_collation_any_uca_handler,
7607 PAD_SPACE};
7608
7609 CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci = {
7610 243,
7611 0,
7612 0, /* number */
7613 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7614 MY_UTF8MB4, /* csname */
7615 MY_UTF8MB4 "_sinhala_ci", /* m_coll_name */
7616 "UTF-8 Unicode", /* comment */
7617 sinhala, /* tailoring */
7618 nullptr, /* coll_param */
7619 ctype_utf8, /* ctype */
7620 nullptr, /* to_lower */
7621 nullptr, /* to_upper */
7622 nullptr, /* sort_order */
7623 nullptr, /* uca */
7624 nullptr, /* tab_to_uni */
7625 nullptr, /* tab_from_uni */
7626 &my_unicase_default, /* caseinfo */
7627 nullptr, /* state_map */
7628 nullptr, /* ident_map */
7629 8, /* strxfrm_multiply */
7630 1, /* caseup_multiply */
7631 1, /* casedn_multiply */
7632 1, /* mbminlen */
7633 4, /* mbmaxlen */
7634 1, /* mbmaxlenlen */
7635 9, /* min_sort_char */
7636 0xFFFF, /* max_sort_char */
7637 ' ', /* pad char */
7638 false, /* escape_with_backslash_is_dangerous */
7639 1, /* levels_for_compare */
7640 &my_charset_utf8mb4_handler,
7641 &my_collation_any_uca_handler,
7642 PAD_SPACE};
7643
7644 CHARSET_INFO my_charset_utf8mb4_german2_uca_ci = {
7645 244,
7646 0,
7647 0, /* number */
7648 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7649 MY_UTF8MB4, /* csname */
7650 MY_UTF8MB4 "_german2_ci", /* m_coll_name */
7651 "UTF-8 Unicode", /* comment */
7652 german2, /* tailoring */
7653 nullptr, /* coll_param */
7654 ctype_utf8, /* ctype */
7655 nullptr, /* to_lower */
7656 nullptr, /* to_upper */
7657 nullptr, /* sort_order */
7658 nullptr, /* uca */
7659 nullptr, /* tab_to_uni */
7660 nullptr, /* tab_from_uni */
7661 &my_unicase_default, /* caseinfo */
7662 nullptr, /* state_map */
7663 nullptr, /* ident_map */
7664 8, /* strxfrm_multiply */
7665 1, /* caseup_multiply */
7666 1, /* casedn_multiply */
7667 1, /* mbminlen */
7668 4, /* mbmaxlen */
7669 1, /* mbmaxlenlen */
7670 9, /* min_sort_char */
7671 0xFFFF, /* max_sort_char */
7672 ' ', /* pad char */
7673 false, /* escape_with_backslash_is_dangerous */
7674 1, /* levels_for_compare */
7675 &my_charset_utf8mb4_handler,
7676 &my_collation_any_uca_handler,
7677 PAD_SPACE};
7678
7679 CHARSET_INFO my_charset_utf8mb4_croatian_uca_ci = {
7680 245,
7681 0,
7682 0, /* number */
7683 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7684 MY_UTF8MB4, /* csname */
7685 MY_UTF8MB4 "_croatian_ci", /* m_coll_name */
7686 "UTF-8 Unicode", /* comment */
7687 croatian, /* tailoring */
7688 nullptr, /* coll_param */
7689 ctype_utf8, /* ctype */
7690 nullptr, /* to_lower */
7691 nullptr, /* to_upper */
7692 nullptr, /* sort_order */
7693 nullptr, /* uca */
7694 nullptr, /* tab_to_uni */
7695 nullptr, /* tab_from_uni */
7696 &my_unicase_default, /* caseinfo */
7697 nullptr, /* state_map */
7698 nullptr, /* ident_map */
7699 8, /* strxfrm_multiply */
7700 1, /* caseup_multiply */
7701 1, /* casedn_multiply */
7702 1, /* mbminlen */
7703 4, /* mbmaxlen */
7704 1, /* mbmaxlenlen */
7705 9, /* min_sort_char */
7706 0xFFFF, /* max_sort_char */
7707 ' ', /* pad char */
7708 false, /* escape_with_backslash_is_dangerous */
7709 1, /* levels_for_compare */
7710 &my_charset_utf8mb4_handler,
7711 &my_collation_any_uca_handler,
7712 PAD_SPACE};
7713
7714 CHARSET_INFO my_charset_utf8mb4_unicode_520_ci = {
7715 246,
7716 0,
7717 0, /* number */
7718 MY_CS_UTF8MB4_UCA_FLAGS, /* flags */
7719 MY_UTF8MB4, /* csname */
7720 MY_UTF8MB4 "_unicode_520_ci", /* m_coll_name */
7721 "UTF-8 Unicode", /* comment */
7722 "", /* tailoring */
7723 nullptr, /* coll_param */
7724 ctype_utf8, /* ctype */
7725 nullptr, /* to_lower */
7726 nullptr, /* to_upper */
7727 nullptr, /* sort_order */
7728 &my_uca_v520, /* uca */
7729 nullptr, /* tab_to_uni */
7730 nullptr, /* tab_from_uni */
7731 &my_unicase_unicode520, /* caseinfo */
7732 nullptr, /* state_map */
7733 nullptr, /* ident_map */
7734 8, /* strxfrm_multiply */
7735 1, /* caseup_multiply */
7736 1, /* casedn_multiply */
7737 1, /* mbminlen */
7738 4, /* mbmaxlen */
7739 1, /* mbmaxlenlen */
7740 9, /* min_sort_char */
7741 0x10FFFF, /* max_sort_char */
7742 ' ', /* pad char */
7743 false, /* escape_with_backslash_is_dangerous */
7744 1, /* levels_for_compare */
7745 &my_charset_utf8mb4_handler,
7746 &my_collation_any_uca_handler,
7747 PAD_SPACE};
7748
7749 CHARSET_INFO my_charset_utf8mb4_vietnamese_ci = {
7750 247,
7751 0,
7752 0, /* number */
7753 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7754 MY_UTF8MB4, /* csname */
7755 MY_UTF8MB4 "_vietnamese_ci", /* m_coll_name */
7756 "UTF-8 Unicode", /* comment */
7757 vietnamese, /* tailoring */
7758 nullptr, /* coll_param */
7759 ctype_utf8, /* ctype */
7760 nullptr, /* to_lower */
7761 nullptr, /* to_upper */
7762 nullptr, /* sort_order */
7763 nullptr, /* uca */
7764 nullptr, /* tab_to_uni */
7765 nullptr, /* tab_from_uni */
7766 &my_unicase_default, /* caseinfo */
7767 nullptr, /* state_map */
7768 nullptr, /* ident_map */
7769 8, /* strxfrm_multiply */
7770 1, /* caseup_multiply */
7771 1, /* casedn_multiply */
7772 1, /* mbminlen */
7773 4, /* mbmaxlen */
7774 1, /* mbmaxlenlen */
7775 9, /* min_sort_char */
7776 0xFFFF, /* max_sort_char */
7777 ' ', /* pad char */
7778 false, /* escape_with_backslash_is_dangerous */
7779 1, /* levels_for_compare */
7780 &my_charset_utf8mb4_handler,
7781 &my_collation_any_uca_handler,
7782 PAD_SPACE};
7783
7784 MY_COLLATION_HANDLER my_collation_utf32_uca_handler = {
7785 my_coll_init_uca, /* init */
7786 my_coll_uninit_uca,
7787 my_strnncoll_any_uca,
7788 my_strnncollsp_any_uca,
7789 my_strnxfrm_any_uca,
7790 my_strnxfrmlen_simple,
7791 my_like_range_generic,
7792 my_wildcmp_uca,
7793 nullptr,
7794 my_instr_mb,
7795 my_hash_sort_any_uca,
7796 my_propagate_complex};
7797
7798 extern MY_CHARSET_HANDLER my_charset_utf32_handler;
7799
7800 #define MY_CS_UTF32_UCA_FLAGS \
7801 (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | \
7802 MY_CS_UNICODE_SUPPLEMENT | MY_CS_NONASCII)
7803
7804 CHARSET_INFO my_charset_utf32_unicode_ci = {
7805 160,
7806 0,
7807 0, /* number */
7808 MY_CS_UTF32_UCA_FLAGS, /* state */
7809 "utf32", /* csname */
7810 "utf32_unicode_ci", /* m_coll_name */
7811 "", /* comment */
7812 "", /* tailoring */
7813 nullptr, /* coll_param */
7814 nullptr, /* ctype */
7815 nullptr, /* to_lower */
7816 nullptr, /* to_upper */
7817 nullptr, /* sort_order */
7818 nullptr, /* uca */
7819 nullptr, /* tab_to_uni */
7820 nullptr, /* tab_from_uni */
7821 &my_unicase_default, /* caseinfo */
7822 nullptr, /* state_map */
7823 nullptr, /* ident_map */
7824 8, /* strxfrm_multiply */
7825 1, /* caseup_multiply */
7826 1, /* casedn_multiply */
7827 4, /* mbminlen */
7828 4, /* mbmaxlen */
7829 1, /* mbmaxlenlen */
7830 9, /* min_sort_char */
7831 0xFFFF, /* max_sort_char */
7832 ' ', /* pad char */
7833 false, /* escape_with_backslash_is_dangerous */
7834 1, /* levels_for_compare */
7835 &my_charset_utf32_handler,
7836 &my_collation_utf32_uca_handler,
7837 PAD_SPACE};
7838
7839 CHARSET_INFO my_charset_utf32_icelandic_uca_ci = {
7840 161,
7841 0,
7842 0, /* number */
7843 MY_CS_UTF32_UCA_FLAGS, /* state */
7844 "utf32", /* csname */
7845 "utf32_icelandic_ci", /* m_coll_name */
7846 "", /* comment */
7847 icelandic, /* tailoring */
7848 nullptr, /* coll_param */
7849 nullptr, /* ctype */
7850 nullptr, /* to_lower */
7851 nullptr, /* to_upper */
7852 nullptr, /* sort_order */
7853 nullptr, /* uca */
7854 nullptr, /* tab_to_uni */
7855 nullptr, /* tab_from_uni */
7856 &my_unicase_default, /* caseinfo */
7857 nullptr, /* state_map */
7858 nullptr, /* ident_map */
7859 8, /* strxfrm_multiply */
7860 1, /* caseup_multiply */
7861 1, /* casedn_multiply */
7862 4, /* mbminlen */
7863 4, /* mbmaxlen */
7864 1, /* mbmaxlenlen */
7865 9, /* min_sort_char */
7866 0xFFFF, /* max_sort_char */
7867 ' ', /* pad char */
7868 false, /* escape_with_backslash_is_dangerous */
7869 1, /* levels_for_compare */
7870 &my_charset_utf32_handler,
7871 &my_collation_utf32_uca_handler,
7872 PAD_SPACE};
7873
7874 CHARSET_INFO my_charset_utf32_latvian_uca_ci = {
7875 162,
7876 0,
7877 0, /* number */
7878 MY_CS_UTF32_UCA_FLAGS, /* state */
7879 "utf32", /* csname */
7880 "utf32_latvian_ci", /* m_coll_name */
7881 "", /* comment */
7882 latvian, /* tailoring */
7883 nullptr, /* coll_param */
7884 nullptr, /* ctype */
7885 nullptr, /* to_lower */
7886 nullptr, /* to_upper */
7887 nullptr, /* sort_order */
7888 nullptr, /* uca */
7889 nullptr, /* tab_to_uni */
7890 nullptr, /* tab_from_uni */
7891 &my_unicase_default, /* caseinfo */
7892 nullptr, /* state_map */
7893 nullptr, /* ident_map */
7894 8, /* strxfrm_multiply */
7895 1, /* caseup_multiply */
7896 1, /* casedn_multiply */
7897 4, /* mbminlen */
7898 4, /* mbmaxlen */
7899 1, /* mbmaxlenlen */
7900 9, /* min_sort_char */
7901 0xFFFF, /* max_sort_char */
7902 ' ', /* pad char */
7903 false, /* escape_with_backslash_is_dangerous */
7904 1, /* levels_for_compare */
7905 &my_charset_utf32_handler,
7906 &my_collation_utf32_uca_handler,
7907 PAD_SPACE};
7908
7909 CHARSET_INFO my_charset_utf32_romanian_uca_ci = {
7910 163,
7911 0,
7912 0, /* number */
7913 MY_CS_UTF32_UCA_FLAGS, /* state */
7914 "utf32", /* csname */
7915 "utf32_romanian_ci", /* m_coll_name */
7916 "", /* comment */
7917 romanian, /* tailoring */
7918 nullptr, /* coll_param */
7919 nullptr, /* ctype */
7920 nullptr, /* to_lower */
7921 nullptr, /* to_upper */
7922 nullptr, /* sort_order */
7923 nullptr, /* uca */
7924 nullptr, /* tab_to_uni */
7925 nullptr, /* tab_from_uni */
7926 &my_unicase_default, /* caseinfo */
7927 nullptr, /* state_map */
7928 nullptr, /* ident_map */
7929 8, /* strxfrm_multiply */
7930 1, /* caseup_multiply */
7931 1, /* casedn_multiply */
7932 4, /* mbminlen */
7933 4, /* mbmaxlen */
7934 1, /* mbmaxlenlen */
7935 9, /* min_sort_char */
7936 0xFFFF, /* max_sort_char */
7937 ' ', /* pad char */
7938 false, /* escape_with_backslash_is_dangerous */
7939 1, /* levels_for_compare */
7940 &my_charset_utf32_handler,
7941 &my_collation_utf32_uca_handler,
7942 PAD_SPACE};
7943
7944 CHARSET_INFO my_charset_utf32_slovenian_uca_ci = {
7945 164,
7946 0,
7947 0, /* number */
7948 MY_CS_UTF32_UCA_FLAGS, /* state */
7949 "utf32", /* csname */
7950 "utf32_slovenian_ci", /* m_coll_name */
7951 "", /* comment */
7952 slovenian, /* tailoring */
7953 nullptr, /* coll_param */
7954 nullptr, /* ctype */
7955 nullptr, /* to_lower */
7956 nullptr, /* to_upper */
7957 nullptr, /* sort_order */
7958 nullptr, /* uca */
7959 nullptr, /* tab_to_uni */
7960 nullptr, /* tab_from_uni */
7961 &my_unicase_default, /* caseinfo */
7962 nullptr, /* state_map */
7963 nullptr, /* ident_map */
7964 8, /* strxfrm_multiply */
7965 1, /* caseup_multiply */
7966 1, /* casedn_multiply */
7967 4, /* mbminlen */
7968 4, /* mbmaxlen */
7969 1, /* mbmaxlenlen */
7970 9, /* min_sort_char */
7971 0xFFFF, /* max_sort_char */
7972 ' ', /* pad char */
7973 false, /* escape_with_backslash_is_dangerous */
7974 1, /* levels_for_compare */
7975 &my_charset_utf32_handler,
7976 &my_collation_utf32_uca_handler,
7977 PAD_SPACE};
7978
7979 CHARSET_INFO my_charset_utf32_polish_uca_ci = {
7980 165,
7981 0,
7982 0, /* number */
7983 MY_CS_UTF32_UCA_FLAGS, /* state */
7984 "utf32", /* csname */
7985 "utf32_polish_ci", /* m_coll_name */
7986 "", /* comment */
7987 polish, /* tailoring */
7988 nullptr, /* coll_param */
7989 nullptr, /* ctype */
7990 nullptr, /* to_lower */
7991 nullptr, /* to_upper */
7992 nullptr, /* sort_order */
7993 nullptr, /* uca */
7994 nullptr, /* tab_to_uni */
7995 nullptr, /* tab_from_uni */
7996 &my_unicase_default, /* caseinfo */
7997 nullptr, /* state_map */
7998 nullptr, /* ident_map */
7999 8, /* strxfrm_multiply */
8000 1, /* caseup_multiply */
8001 1, /* casedn_multiply */
8002 4, /* mbminlen */
8003 4, /* mbmaxlen */
8004 1, /* mbmaxlenlen */
8005 9, /* min_sort_char */
8006 0xFFFF, /* max_sort_char */
8007 ' ', /* pad char */
8008 false, /* escape_with_backslash_is_dangerous */
8009 1, /* levels_for_compare */
8010 &my_charset_utf32_handler,
8011 &my_collation_utf32_uca_handler,
8012 PAD_SPACE};
8013
8014 CHARSET_INFO my_charset_utf32_estonian_uca_ci = {
8015 166,
8016 0,
8017 0, /* number */
8018 MY_CS_UTF32_UCA_FLAGS, /* state */
8019 "utf32", /* csname */
8020 "utf32_estonian_ci", /* m_coll_name */
8021 "", /* comment */
8022 estonian, /* tailoring */
8023 nullptr, /* coll_param */
8024 nullptr, /* ctype */
8025 nullptr, /* to_lower */
8026 nullptr, /* to_upper */
8027 nullptr, /* sort_order */
8028 nullptr, /* uca */
8029 nullptr, /* tab_to_uni */
8030 nullptr, /* tab_from_uni */
8031 &my_unicase_default, /* caseinfo */
8032 nullptr, /* state_map */
8033 nullptr, /* ident_map */
8034 8, /* strxfrm_multiply */
8035 1, /* caseup_multiply */
8036 1, /* casedn_multiply */
8037 4, /* mbminlen */
8038 4, /* mbmaxlen */
8039 1, /* mbmaxlenlen */
8040 9, /* min_sort_char */
8041 0xFFFF, /* max_sort_char */
8042 ' ', /* pad char */
8043 false, /* escape_with_backslash_is_dangerous */
8044 1, /* levels_for_compare */
8045 &my_charset_utf32_handler,
8046 &my_collation_utf32_uca_handler,
8047 PAD_SPACE};
8048
8049 CHARSET_INFO my_charset_utf32_spanish_uca_ci = {
8050 167,
8051 0,
8052 0, /* number */
8053 MY_CS_UTF32_UCA_FLAGS, /* state */
8054 "utf32", /* csname */
8055 "utf32_spanish_ci", /* m_coll_name */
8056 "", /* comment */
8057 spanish, /* tailoring */
8058 nullptr, /* coll_param */
8059 nullptr, /* ctype */
8060 nullptr, /* to_lower */
8061 nullptr, /* to_upper */
8062 nullptr, /* sort_order */
8063 nullptr, /* uca */
8064 nullptr, /* tab_to_uni */
8065 nullptr, /* tab_from_uni */
8066 &my_unicase_default, /* caseinfo */
8067 nullptr, /* state_map */
8068 nullptr, /* ident_map */
8069 8, /* strxfrm_multiply */
8070 1, /* caseup_multiply */
8071 1, /* casedn_multiply */
8072 4, /* mbminlen */
8073 4, /* mbmaxlen */
8074 1, /* mbmaxlenlen */
8075 9, /* min_sort_char */
8076 0xFFFF, /* max_sort_char */
8077 ' ', /* pad char */
8078 false, /* escape_with_backslash_is_dangerous */
8079 1, /* levels_for_compare */
8080 &my_charset_utf32_handler,
8081 &my_collation_utf32_uca_handler,
8082 PAD_SPACE};
8083
8084 CHARSET_INFO my_charset_utf32_swedish_uca_ci = {
8085 168,
8086 0,
8087 0, /* number */
8088 MY_CS_UTF32_UCA_FLAGS, /* state */
8089 "utf32", /* csname */
8090 "utf32_swedish_ci", /* m_coll_name */
8091 "", /* comment */
8092 swedish, /* tailoring */
8093 nullptr, /* coll_param */
8094 nullptr, /* ctype */
8095 nullptr, /* to_lower */
8096 nullptr, /* to_upper */
8097 nullptr, /* sort_order */
8098 nullptr, /* uca */
8099 nullptr, /* tab_to_uni */
8100 nullptr, /* tab_from_uni */
8101 &my_unicase_default, /* caseinfo */
8102 nullptr, /* state_map */
8103 nullptr, /* ident_map */
8104 8, /* strxfrm_multiply */
8105 1, /* caseup_multiply */
8106 1, /* casedn_multiply */
8107 4, /* mbminlen */
8108 4, /* mbmaxlen */
8109 1, /* mbmaxlenlen */
8110 9, /* min_sort_char */
8111 0xFFFF, /* max_sort_char */
8112 ' ', /* pad char */
8113 false, /* escape_with_backslash_is_dangerous */
8114 1, /* levels_for_compare */
8115 &my_charset_utf32_handler,
8116 &my_collation_utf32_uca_handler,
8117 PAD_SPACE};
8118
8119 CHARSET_INFO my_charset_utf32_turkish_uca_ci = {
8120 169,
8121 0,
8122 0, /* number */
8123 MY_CS_UTF32_UCA_FLAGS, /* state */
8124 "utf32", /* csname */
8125 "utf32_turkish_ci", /* m_coll_name */
8126 "", /* comment */
8127 turkish, /* tailoring */
8128 nullptr, /* coll_param */
8129 nullptr, /* ctype */
8130 nullptr, /* to_lower */
8131 nullptr, /* to_upper */
8132 nullptr, /* sort_order */
8133 nullptr, /* uca */
8134 nullptr, /* tab_to_uni */
8135 nullptr, /* tab_from_uni */
8136 &my_unicase_turkish, /* caseinfo */
8137 nullptr, /* state_map */
8138 nullptr, /* ident_map */
8139 8, /* strxfrm_multiply */
8140 1, /* caseup_multiply */
8141 1, /* casedn_multiply */
8142 4, /* mbminlen */
8143 4, /* mbmaxlen */
8144 1, /* mbmaxlenlen */
8145 9, /* min_sort_char */
8146 0xFFFF, /* max_sort_char */
8147 ' ', /* pad char */
8148 false, /* escape_with_backslash_is_dangerous */
8149 1, /* levels_for_compare */
8150 &my_charset_utf32_handler,
8151 &my_collation_utf32_uca_handler,
8152 PAD_SPACE};
8153
8154 CHARSET_INFO my_charset_utf32_czech_uca_ci = {
8155 170,
8156 0,
8157 0, /* number */
8158 MY_CS_UTF32_UCA_FLAGS, /* state */
8159 "utf32", /* csname */
8160 "utf32_czech_ci", /* m_coll_name */
8161 "", /* comment */
8162 czech, /* tailoring */
8163 nullptr, /* coll_param */
8164 nullptr, /* ctype */
8165 nullptr, /* to_lower */
8166 nullptr, /* to_upper */
8167 nullptr, /* sort_order */
8168 nullptr, /* uca */
8169 nullptr, /* tab_to_uni */
8170 nullptr, /* tab_from_uni */
8171 &my_unicase_default, /* caseinfo */
8172 nullptr, /* state_map */
8173 nullptr, /* ident_map */
8174 8, /* strxfrm_multiply */
8175 1, /* caseup_multiply */
8176 1, /* casedn_multiply */
8177 4, /* mbminlen */
8178 4, /* mbmaxlen */
8179 1, /* mbmaxlenlen */
8180 9, /* min_sort_char */
8181 0xFFFF, /* max_sort_char */
8182 ' ', /* pad char */
8183 false, /* escape_with_backslash_is_dangerous */
8184 1, /* levels_for_compare */
8185 &my_charset_utf32_handler,
8186 &my_collation_utf32_uca_handler,
8187 PAD_SPACE};
8188
8189 CHARSET_INFO my_charset_utf32_danish_uca_ci = {
8190 171,
8191 0,
8192 0, /* number */
8193 MY_CS_UTF32_UCA_FLAGS, /* state */
8194 "utf32", /* csname */
8195 "utf32_danish_ci", /* m_coll_name */
8196 "", /* comment */
8197 danish, /* tailoring */
8198 nullptr, /* coll_param */
8199 nullptr, /* ctype */
8200 nullptr, /* to_lower */
8201 nullptr, /* to_upper */
8202 nullptr, /* sort_order */
8203 nullptr, /* uca */
8204 nullptr, /* tab_to_uni */
8205 nullptr, /* tab_from_uni */
8206 &my_unicase_default, /* caseinfo */
8207 nullptr, /* state_map */
8208 nullptr, /* ident_map */
8209 8, /* strxfrm_multiply */
8210 1, /* caseup_multiply */
8211 1, /* casedn_multiply */
8212 4, /* mbminlen */
8213 4, /* mbmaxlen */
8214 1, /* mbmaxlenlen */
8215 9, /* min_sort_char */
8216 0xFFFF, /* max_sort_char */
8217 ' ', /* pad char */
8218 false, /* escape_with_backslash_is_dangerous */
8219 1, /* levels_for_compare */
8220 &my_charset_utf32_handler,
8221 &my_collation_utf32_uca_handler,
8222 PAD_SPACE};
8223
8224 CHARSET_INFO my_charset_utf32_lithuanian_uca_ci = {
8225 172,
8226 0,
8227 0, /* number */
8228 MY_CS_UTF32_UCA_FLAGS, /* state */
8229 "utf32", /* csname */
8230 "utf32_lithuanian_ci", /* m_coll_name */
8231 "", /* comment */
8232 lithuanian, /* tailoring */
8233 nullptr, /* coll_param */
8234 nullptr, /* ctype */
8235 nullptr, /* to_lower */
8236 nullptr, /* to_upper */
8237 nullptr, /* sort_order */
8238 nullptr, /* uca */
8239 nullptr, /* tab_to_uni */
8240 nullptr, /* tab_from_uni */
8241 &my_unicase_default, /* caseinfo */
8242 nullptr, /* state_map */
8243 nullptr, /* ident_map */
8244 8, /* strxfrm_multiply */
8245 1, /* caseup_multiply */
8246 1, /* casedn_multiply */
8247 4, /* mbminlen */
8248 4, /* mbmaxlen */
8249 1, /* mbmaxlenlen */
8250 9, /* min_sort_char */
8251 0xFFFF, /* max_sort_char */
8252 ' ', /* pad char */
8253 false, /* escape_with_backslash_is_dangerous */
8254 1, /* levels_for_compare */
8255 &my_charset_utf32_handler,
8256 &my_collation_utf32_uca_handler,
8257 PAD_SPACE};
8258
8259 CHARSET_INFO my_charset_utf32_slovak_uca_ci = {
8260 173,
8261 0,
8262 0, /* number */
8263 MY_CS_UTF32_UCA_FLAGS, /* state */
8264 "utf32", /* csname */
8265 "utf32_slovak_ci", /* m_coll_name */
8266 "", /* comment */
8267 slovak, /* tailoring */
8268 nullptr, /* coll_param */
8269 nullptr, /* ctype */
8270 nullptr, /* to_lower */
8271 nullptr, /* to_upper */
8272 nullptr, /* sort_order */
8273 nullptr, /* uca */
8274 nullptr, /* tab_to_uni */
8275 nullptr, /* tab_from_uni */
8276 &my_unicase_default, /* caseinfo */
8277 nullptr, /* state_map */
8278 nullptr, /* ident_map */
8279 8, /* strxfrm_multiply */
8280 1, /* caseup_multiply */
8281 1, /* casedn_multiply */
8282 4, /* mbminlen */
8283 4, /* mbmaxlen */
8284 1, /* mbmaxlenlen */
8285 9, /* min_sort_char */
8286 0xFFFF, /* max_sort_char */
8287 ' ', /* pad char */
8288 false, /* escape_with_backslash_is_dangerous */
8289 1, /* levels_for_compare */
8290 &my_charset_utf32_handler,
8291 &my_collation_utf32_uca_handler,
8292 PAD_SPACE};
8293
8294 CHARSET_INFO my_charset_utf32_spanish2_uca_ci = {
8295 174,
8296 0,
8297 0, /* number */
8298 MY_CS_UTF32_UCA_FLAGS, /* state */
8299 "utf32", /* csname */
8300 "utf32_spanish2_ci", /* m_coll_name */
8301 "", /* comment */
8302 spanish2, /* tailoring */
8303 nullptr, /* coll_param */
8304 nullptr, /* ctype */
8305 nullptr, /* to_lower */
8306 nullptr, /* to_upper */
8307 nullptr, /* sort_order */
8308 nullptr, /* uca */
8309 nullptr, /* tab_to_uni */
8310 nullptr, /* tab_from_uni */
8311 &my_unicase_default, /* caseinfo */
8312 nullptr, /* state_map */
8313 nullptr, /* ident_map */
8314 8, /* strxfrm_multiply */
8315 1, /* caseup_multiply */
8316 1, /* casedn_multiply */
8317 4, /* mbminlen */
8318 4, /* mbmaxlen */
8319 1, /* mbmaxlenlen */
8320 9, /* min_sort_char */
8321 0xFFFF, /* max_sort_char */
8322 ' ', /* pad char */
8323 false, /* escape_with_backslash_is_dangerous */
8324 1, /* levels_for_compare */
8325 &my_charset_utf32_handler,
8326 &my_collation_utf32_uca_handler,
8327 PAD_SPACE};
8328
8329 CHARSET_INFO my_charset_utf32_roman_uca_ci = {
8330 175,
8331 0,
8332 0, /* number */
8333 MY_CS_UTF32_UCA_FLAGS, /* state */
8334 "utf32", /* csname */
8335 "utf32_roman_ci", /* m_coll_name */
8336 "", /* comment */
8337 roman, /* tailoring */
8338 nullptr, /* coll_param */
8339 nullptr, /* ctype */
8340 nullptr, /* to_lower */
8341 nullptr, /* to_upper */
8342 nullptr, /* sort_order */
8343 nullptr, /* uca */
8344 nullptr, /* tab_to_uni */
8345 nullptr, /* tab_from_uni */
8346 &my_unicase_default, /* caseinfo */
8347 nullptr, /* state_map */
8348 nullptr, /* ident_map */
8349 8, /* strxfrm_multiply */
8350 1, /* caseup_multiply */
8351 1, /* casedn_multiply */
8352 4, /* mbminlen */
8353 4, /* mbmaxlen */
8354 1, /* mbmaxlenlen */
8355 9, /* min_sort_char */
8356 0xFFFF, /* max_sort_char */
8357 ' ', /* pad char */
8358 false, /* escape_with_backslash_is_dangerous */
8359 1, /* levels_for_compare */
8360 &my_charset_utf32_handler,
8361 &my_collation_utf32_uca_handler,
8362 PAD_SPACE};
8363
8364 CHARSET_INFO my_charset_utf32_persian_uca_ci = {
8365 176,
8366 0,
8367 0, /* number */
8368 MY_CS_UTF32_UCA_FLAGS, /* state */
8369 "utf32", /* csname */
8370 "utf32_persian_ci", /* m_coll_name */
8371 "", /* comment */
8372 persian, /* tailoring */
8373 nullptr, /* coll_param */
8374 nullptr, /* ctype */
8375 nullptr, /* to_lower */
8376 nullptr, /* to_upper */
8377 nullptr, /* sort_order */
8378 nullptr, /* uca */
8379 nullptr, /* tab_to_uni */
8380 nullptr, /* tab_from_uni */
8381 &my_unicase_default, /* caseinfo */
8382 nullptr, /* state_map */
8383 nullptr, /* ident_map */
8384 8, /* strxfrm_multiply */
8385 1, /* caseup_multiply */
8386 1, /* casedn_multiply */
8387 4, /* mbminlen */
8388 4, /* mbmaxlen */
8389 1, /* mbmaxlenlen */
8390 9, /* min_sort_char */
8391 0xFFFF, /* max_sort_char */
8392 ' ', /* pad char */
8393 false, /* escape_with_backslash_is_dangerous */
8394 1, /* levels_for_compare */
8395 &my_charset_utf32_handler,
8396 &my_collation_utf32_uca_handler,
8397 PAD_SPACE};
8398
8399 CHARSET_INFO my_charset_utf32_esperanto_uca_ci = {
8400 177,
8401 0,
8402 0, /* number */
8403 MY_CS_UTF32_UCA_FLAGS, /* state */
8404 "utf32", /* csname */
8405 "utf32_esperanto_ci", /* m_coll_name */
8406 "", /* comment */
8407 esperanto, /* tailoring */
8408 nullptr, /* coll_param */
8409 nullptr, /* ctype */
8410 nullptr, /* to_lower */
8411 nullptr, /* to_upper */
8412 nullptr, /* sort_order */
8413 nullptr, /* uca */
8414 nullptr, /* tab_to_uni */
8415 nullptr, /* tab_from_uni */
8416 &my_unicase_default, /* caseinfo */
8417 nullptr, /* state_map */
8418 nullptr, /* ident_map */
8419 8, /* strxfrm_multiply */
8420 1, /* caseup_multiply */
8421 1, /* casedn_multiply */
8422 4, /* mbminlen */
8423 4, /* mbmaxlen */
8424 1, /* mbmaxlenlen */
8425 9, /* min_sort_char */
8426 0xFFFF, /* max_sort_char */
8427 ' ', /* pad char */
8428 false, /* escape_with_backslash_is_dangerous */
8429 1, /* levels_for_compare */
8430 &my_charset_utf32_handler,
8431 &my_collation_utf32_uca_handler,
8432 PAD_SPACE};
8433
8434 CHARSET_INFO my_charset_utf32_hungarian_uca_ci = {
8435 178,
8436 0,
8437 0, /* number */
8438 MY_CS_UTF32_UCA_FLAGS, /* state */
8439 "utf32", /* csname */
8440 "utf32_hungarian_ci", /* m_coll_name */
8441 "", /* comment */
8442 hungarian, /* tailoring */
8443 nullptr, /* coll_param */
8444 nullptr, /* ctype */
8445 nullptr, /* to_lower */
8446 nullptr, /* to_upper */
8447 nullptr, /* sort_order */
8448 nullptr, /* uca */
8449 nullptr, /* tab_to_uni */
8450 nullptr, /* tab_from_uni */
8451 &my_unicase_default, /* caseinfo */
8452 nullptr, /* state_map */
8453 nullptr, /* ident_map */
8454 8, /* strxfrm_multiply */
8455 1, /* caseup_multiply */
8456 1, /* casedn_multiply */
8457 4, /* mbminlen */
8458 4, /* mbmaxlen */
8459 1, /* mbmaxlenlen */
8460 9, /* min_sort_char */
8461 0xFFFF, /* max_sort_char */
8462 ' ', /* pad char */
8463 false, /* escape_with_backslash_is_dangerous */
8464 1, /* levels_for_compare */
8465 &my_charset_utf32_handler,
8466 &my_collation_utf32_uca_handler,
8467 PAD_SPACE};
8468
8469 CHARSET_INFO my_charset_utf32_sinhala_uca_ci = {
8470 179,
8471 0,
8472 0, /* number */
8473 MY_CS_UTF32_UCA_FLAGS, /* state */
8474 "utf32", /* csname */
8475 "utf32_sinhala_ci", /* m_coll_name */
8476 "", /* comment */
8477 sinhala, /* tailoring */
8478 nullptr, /* coll_param */
8479 nullptr, /* ctype */
8480 nullptr, /* to_lower */
8481 nullptr, /* to_upper */
8482 nullptr, /* sort_order */
8483 nullptr, /* uca */
8484 nullptr, /* tab_to_uni */
8485 nullptr, /* tab_from_uni */
8486 &my_unicase_default, /* caseinfo */
8487 nullptr, /* state_map */
8488 nullptr, /* ident_map */
8489 8, /* strxfrm_multiply */
8490 1, /* caseup_multiply */
8491 1, /* casedn_multiply */
8492 4, /* mbminlen */
8493 4, /* mbmaxlen */
8494 1, /* mbmaxlenlen */
8495 9, /* min_sort_char */
8496 0xFFFF, /* max_sort_char */
8497 ' ', /* pad char */
8498 false, /* escape_with_backslash_is_dangerous */
8499 1, /* levels_for_compare */
8500 &my_charset_utf32_handler,
8501 &my_collation_utf32_uca_handler,
8502 PAD_SPACE};
8503
8504 CHARSET_INFO my_charset_utf32_german2_uca_ci = {
8505 180,
8506 0,
8507 0, /* number */
8508 MY_CS_UTF32_UCA_FLAGS, /* state */
8509 "utf32", /* csname */
8510 "utf32_german2_ci", /* m_coll_name */
8511 "", /* comment */
8512 german2, /* tailoring */
8513 nullptr, /* coll_param */
8514 nullptr, /* ctype */
8515 nullptr, /* to_lower */
8516 nullptr, /* to_upper */
8517 nullptr, /* sort_order */
8518 nullptr, /* uca */
8519 nullptr, /* tab_to_uni */
8520 nullptr, /* tab_from_uni */
8521 &my_unicase_default, /* caseinfo */
8522 nullptr, /* state_map */
8523 nullptr, /* ident_map */
8524 8, /* strxfrm_multiply */
8525 1, /* caseup_multiply */
8526 1, /* casedn_multiply */
8527 4, /* mbminlen */
8528 4, /* mbmaxlen */
8529 1, /* mbmaxlenlen */
8530 9, /* min_sort_char */
8531 0xFFFF, /* max_sort_char */
8532 ' ', /* pad char */
8533 false, /* escape_with_backslash_is_dangerous */
8534 1, /* levels_for_compare */
8535 &my_charset_utf32_handler,
8536 &my_collation_utf32_uca_handler,
8537 PAD_SPACE};
8538
8539 CHARSET_INFO my_charset_utf32_croatian_uca_ci = {
8540 181,
8541 0,
8542 0, /* number */
8543 MY_CS_UTF32_UCA_FLAGS, /* state */
8544 "utf32", /* csname */
8545 "utf32_croatian_ci", /* m_coll_name */
8546 "", /* comment */
8547 croatian, /* tailoring */
8548 nullptr, /* coll_param */
8549 nullptr, /* ctype */
8550 nullptr, /* to_lower */
8551 nullptr, /* to_upper */
8552 nullptr, /* sort_order */
8553 nullptr, /* uca */
8554 nullptr, /* tab_to_uni */
8555 nullptr, /* tab_from_uni */
8556 &my_unicase_default, /* caseinfo */
8557 nullptr, /* state_map */
8558 nullptr, /* ident_map */
8559 8, /* strxfrm_multiply */
8560 1, /* caseup_multiply */
8561 1, /* casedn_multiply */
8562 4, /* mbminlen */
8563 4, /* mbmaxlen */
8564 1, /* mbmaxlenlen */
8565 9, /* min_sort_char */
8566 0xFFFF, /* max_sort_char */
8567 ' ', /* pad char */
8568 false, /* escape_with_backslash_is_dangerous */
8569 1, /* levels_for_compare */
8570 &my_charset_utf32_handler,
8571 &my_collation_utf32_uca_handler,
8572 PAD_SPACE};
8573
8574 CHARSET_INFO my_charset_utf32_unicode_520_ci = {
8575 182,
8576 0,
8577 0, /* number */
8578 MY_CS_UTF32_UCA_FLAGS, /* stat e */
8579 "utf32", /* csname */
8580 "utf32_unicode_520_ci", /* m_coll_name */
8581 "", /* comment */
8582 "", /* tailoring */
8583 nullptr, /* coll_param */
8584 nullptr, /* ctype */
8585 nullptr, /* to_lower */
8586 nullptr, /* to_upper */
8587 nullptr, /* sort_order */
8588 &my_uca_v520, /* uca */
8589 nullptr, /* tab_to_uni */
8590 nullptr, /* tab_from_uni */
8591 &my_unicase_unicode520, /* caseinfo */
8592 nullptr, /* state_map */
8593 nullptr, /* ident_map */
8594 8, /* strxfrm_multiply */
8595 1, /* caseup_multiply */
8596 1, /* casedn_multiply */
8597 4, /* mbminlen */
8598 4, /* mbmaxlen */
8599 1, /* mbmaxlenlen */
8600 9, /* min_sort_char */
8601 0x10FFFF, /* max_sort_char */
8602 ' ', /* pad char */
8603 false, /* escape_with_backslash_is_dangerous */
8604 1, /* levels_for_compare */
8605 &my_charset_utf32_handler,
8606 &my_collation_utf32_uca_handler,
8607 PAD_SPACE};
8608
8609 CHARSET_INFO my_charset_utf32_vietnamese_ci = {
8610 183,
8611 0,
8612 0, /* number */
8613 MY_CS_UTF32_UCA_FLAGS, /* state */
8614 "utf32", /* csname */
8615 "utf32_vietnamese_ci", /* m_coll_name */
8616 "", /* comment */
8617 vietnamese, /* tailoring */
8618 nullptr, /* coll_param */
8619 nullptr, /* ctype */
8620 nullptr, /* to_lower */
8621 nullptr, /* to_upper */
8622 nullptr, /* sort_order */
8623 nullptr, /* uca */
8624 nullptr, /* tab_to_uni */
8625 nullptr, /* tab_from_uni */
8626 &my_unicase_default, /* caseinfo */
8627 nullptr, /* state_map */
8628 nullptr, /* ident_map */
8629 8, /* strxfrm_multiply */
8630 1, /* caseup_multiply */
8631 1, /* casedn_multiply */
8632 4, /* mbminlen */
8633 4, /* mbmaxlen */
8634 1, /* mbmaxlenlen */
8635 9, /* min_sort_char */
8636 0xFFFF, /* max_sort_char */
8637 ' ', /* pad char */
8638 false, /* escape_with_backslash_is_dangerous */
8639 1, /* levels_for_compare */
8640 &my_charset_utf32_handler,
8641 &my_collation_utf32_uca_handler,
8642 PAD_SPACE};
8643
8644 MY_COLLATION_HANDLER my_collation_utf16_uca_handler = {
8645 my_coll_init_uca, /* init */
8646 my_coll_uninit_uca,
8647 my_strnncoll_any_uca,
8648 my_strnncollsp_any_uca,
8649 my_strnxfrm_any_uca,
8650 my_strnxfrmlen_simple,
8651 my_like_range_generic,
8652 my_wildcmp_uca,
8653 nullptr,
8654 my_instr_mb,
8655 my_hash_sort_any_uca,
8656 my_propagate_complex};
8657
8658 extern MY_CHARSET_HANDLER my_charset_utf16_handler;
8659
8660 #define MY_CS_UTF16_UCA_FLAGS \
8661 (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII)
8662
8663 CHARSET_INFO my_charset_utf16_unicode_ci = {
8664 101,
8665 0,
8666 0, /* number */
8667 MY_CS_UTF16_UCA_FLAGS, /* state */
8668 "utf16", /* csname */
8669 "utf16_unicode_ci", /* m_coll_name */
8670 "", /* comment */
8671 "", /* tailoring */
8672 nullptr, /* coll_param */
8673 nullptr, /* ctype */
8674 nullptr, /* to_lower */
8675 nullptr, /* to_upper */
8676 nullptr, /* sort_order */
8677 nullptr, /* uca */
8678 nullptr, /* tab_to_uni */
8679 nullptr, /* tab_from_uni */
8680 &my_unicase_default, /* caseinfo */
8681 nullptr, /* state_map */
8682 nullptr, /* ident_map */
8683 8, /* strxfrm_multiply */
8684 1, /* caseup_multiply */
8685 1, /* casedn_multiply */
8686 2, /* mbminlen */
8687 4, /* mbmaxlen */
8688 1, /* mbmaxlenlen */
8689 9, /* min_sort_char */
8690 0xFFFF, /* max_sort_char */
8691 ' ', /* pad char */
8692 false, /* escape_with_backslash_is_dangerous */
8693 1, /* levels_for_compare */
8694 &my_charset_utf16_handler,
8695 &my_collation_utf16_uca_handler,
8696 PAD_SPACE};
8697
8698 CHARSET_INFO my_charset_utf16_icelandic_uca_ci = {
8699 102,
8700 0,
8701 0, /* number */
8702 MY_CS_UTF16_UCA_FLAGS, /* state */
8703 "utf16", /* csname */
8704 "utf16_icelandic_ci", /* m_coll_name */
8705 "", /* comment */
8706 icelandic, /* tailoring */
8707 nullptr, /* coll_param */
8708 nullptr, /* ctype */
8709 nullptr, /* to_lower */
8710 nullptr, /* to_upper */
8711 nullptr, /* sort_order */
8712 nullptr, /* uca */
8713 nullptr, /* tab_to_uni */
8714 nullptr, /* tab_from_uni */
8715 &my_unicase_default, /* caseinfo */
8716 nullptr, /* state_map */
8717 nullptr, /* ident_map */
8718 8, /* strxfrm_multiply */
8719 1, /* caseup_multiply */
8720 1, /* casedn_multiply */
8721 2, /* mbminlen */
8722 4, /* mbmaxlen */
8723 1, /* mbmaxlenlen */
8724 9, /* min_sort_char */
8725 0xFFFF, /* max_sort_char */
8726 ' ', /* pad char */
8727 false, /* escape_with_backslash_is_dangerous */
8728 1, /* levels_for_compare */
8729 &my_charset_utf16_handler,
8730 &my_collation_utf16_uca_handler,
8731 PAD_SPACE};
8732
8733 CHARSET_INFO my_charset_utf16_latvian_uca_ci = {
8734 103,
8735 0,
8736 0, /* number */
8737 MY_CS_UTF16_UCA_FLAGS, /* state */
8738 "utf16", /* cs name */
8739 "utf16_latvian_ci", /* m_coll_name */
8740 "", /* comment */
8741 latvian, /* tailoring */
8742 nullptr, /* coll_param */
8743 nullptr, /* ctype */
8744 nullptr, /* to_lower */
8745 nullptr, /* to_upper */
8746 nullptr, /* sort_order */
8747 nullptr, /* uca */
8748 nullptr, /* tab_to_uni */
8749 nullptr, /* tab_from_uni */
8750 &my_unicase_default, /* caseinfo */
8751 nullptr, /* state_map */
8752 nullptr, /* ident_map */
8753 8, /* strxfrm_multiply */
8754 1, /* caseup_multiply */
8755 1, /* casedn_multiply */
8756 2, /* mbminlen */
8757 4, /* mbmaxlen */
8758 1, /* mbmaxlenlen */
8759 9, /* min_sort_char */
8760 0xFFFF, /* max_sort_char */
8761 ' ', /* pad char */
8762 false, /* escape_with_backslash_is_dangerous */
8763 1, /* levels_for_compare */
8764 &my_charset_utf16_handler,
8765 &my_collation_utf16_uca_handler,
8766 PAD_SPACE};
8767
8768 CHARSET_INFO my_charset_utf16_romanian_uca_ci = {
8769 104,
8770 0,
8771 0, /* number */
8772 MY_CS_UTF16_UCA_FLAGS, /* state */
8773 "utf16", /* cs name */
8774 "utf16_romanian_ci", /* m_coll_name */
8775 "", /* comment */
8776 romanian, /* tailoring */
8777 nullptr, /* coll_param */
8778 nullptr, /* ctype */
8779 nullptr, /* to_lower */
8780 nullptr, /* to_upper */
8781 nullptr, /* sort_order */
8782 nullptr, /* uca */
8783 nullptr, /* tab_to_uni */
8784 nullptr, /* tab_from_uni */
8785 &my_unicase_default, /* caseinfo */
8786 nullptr, /* state_map */
8787 nullptr, /* ident_map */
8788 8, /* strxfrm_multiply */
8789 1, /* caseup_multiply */
8790 1, /* casedn_multiply */
8791 2, /* mbminlen */
8792 4, /* mbmaxlen */
8793 1, /* mbmaxlenlen */
8794 9, /* min_sort_char */
8795 0xFFFF, /* max_sort_char */
8796 ' ', /* pad char */
8797 false, /* escape_with_backslash_is_dangerous */
8798 1, /* levels_for_compare */
8799 &my_charset_utf16_handler,
8800 &my_collation_utf16_uca_handler,
8801 PAD_SPACE};
8802
8803 CHARSET_INFO my_charset_utf16_slovenian_uca_ci = {
8804 105,
8805 0,
8806 0, /* number */
8807 MY_CS_UTF16_UCA_FLAGS, /* state */
8808 "utf16", /* cs name */
8809 "utf16_slovenian_ci", /* m_coll_name */
8810 "", /* comment */
8811 slovenian, /* tailoring */
8812 nullptr, /* coll_param */
8813 nullptr, /* ctype */
8814 nullptr, /* to_lower */
8815 nullptr, /* to_upper */
8816 nullptr, /* sort_order */
8817 nullptr, /* uca */
8818 nullptr, /* tab_to_uni */
8819 nullptr, /* tab_from_uni */
8820 &my_unicase_default, /* caseinfo */
8821 nullptr, /* state_map */
8822 nullptr, /* ident_map */
8823 8, /* strxfrm_multiply */
8824 1, /* caseup_multiply */
8825 1, /* casedn_multiply */
8826 2, /* mbminlen */
8827 4, /* mbmaxlen */
8828 1, /* mbmaxlenlen */
8829 9, /* min_sort_char */
8830 0xFFFF, /* max_sort_char */
8831 ' ', /* pad char */
8832 false, /* escape_with_backslash_is_dangerous */
8833 1, /* levels_for_compare */
8834 &my_charset_utf16_handler,
8835 &my_collation_utf16_uca_handler,
8836 PAD_SPACE};
8837
8838 CHARSET_INFO my_charset_utf16_polish_uca_ci = {
8839 106,
8840 0,
8841 0, /* number */
8842 MY_CS_UTF16_UCA_FLAGS, /* state */
8843 "utf16", /* cs name */
8844 "utf16_polish_ci", /* m_coll_name */
8845 "", /* comment */
8846 polish, /* tailoring */
8847 nullptr, /* coll_param */
8848 nullptr, /* ctype */
8849 nullptr, /* to_lower */
8850 nullptr, /* to_upper */
8851 nullptr, /* sort_order */
8852 nullptr, /* uca */
8853 nullptr, /* tab_to_uni */
8854 nullptr, /* tab_from_uni */
8855 &my_unicase_default, /* caseinfo */
8856 nullptr, /* state_map */
8857 nullptr, /* ident_map */
8858 8, /* strxfrm_multiply */
8859 1, /* caseup_multiply */
8860 1, /* casedn_multiply */
8861 2, /* mbminlen */
8862 4, /* mbmaxlen */
8863 1, /* mbmaxlenlen */
8864 9, /* min_sort_char */
8865 0xFFFF, /* max_sort_char */
8866 ' ', /* pad char */
8867 false, /* escape_with_backslash_is_dangerous */
8868 1, /* levels_for_compare */
8869 &my_charset_utf16_handler,
8870 &my_collation_utf16_uca_handler,
8871 PAD_SPACE};
8872
8873 CHARSET_INFO my_charset_utf16_estonian_uca_ci = {
8874 107,
8875 0,
8876 0, /* number */
8877 MY_CS_UTF16_UCA_FLAGS, /* state */
8878 "utf16", /* cs name */
8879 "utf16_estonian_ci", /* m_coll_name */
8880 "", /* comment */
8881 estonian, /* tailoring */
8882 nullptr, /* coll_param */
8883 nullptr, /* ctype */
8884 nullptr, /* to_lower */
8885 nullptr, /* to_upper */
8886 nullptr, /* sort_order */
8887 nullptr, /* uca */
8888 nullptr, /* tab_to_uni */
8889 nullptr, /* tab_from_uni */
8890 &my_unicase_default, /* caseinfo */
8891 nullptr, /* state_map */
8892 nullptr, /* ident_map */
8893 8, /* strxfrm_multiply */
8894 1, /* caseup_multiply */
8895 1, /* casedn_multiply */
8896 2, /* mbminlen */
8897 4, /* mbmaxlen */
8898 1, /* mbmaxlenlen */
8899 9, /* min_sort_char */
8900 0xFFFF, /* max_sort_char */
8901 ' ', /* pad char */
8902 false, /* escape_with_backslash_is_dangerous */
8903 1, /* levels_for_compare */
8904 &my_charset_utf16_handler,
8905 &my_collation_utf16_uca_handler,
8906 PAD_SPACE};
8907
8908 CHARSET_INFO my_charset_utf16_spanish_uca_ci = {
8909 108,
8910 0,
8911 0, /* number */
8912 MY_CS_UTF16_UCA_FLAGS, /* state */
8913 "utf16", /* cs name */
8914 "utf16_spanish_ci", /* m_coll_name */
8915 "", /* comment */
8916 spanish, /* tailoring */
8917 nullptr, /* coll_param */
8918 nullptr, /* ctype */
8919 nullptr, /* to_lower */
8920 nullptr, /* to_upper */
8921 nullptr, /* sort_order */
8922 nullptr, /* uca */
8923 nullptr, /* tab_to_uni */
8924 nullptr, /* tab_from_uni */
8925 &my_unicase_default, /* caseinfo */
8926 nullptr, /* state_map */
8927 nullptr, /* ident_map */
8928 8, /* strxfrm_multiply */
8929 1, /* caseup_multiply */
8930 1, /* casedn_multiply */
8931 2, /* mbminlen */
8932 4, /* mbmaxlen */
8933 1, /* mbmaxlenlen */
8934 9, /* min_sort_char */
8935 0xFFFF, /* max_sort_char */
8936 ' ', /* pad char */
8937 false, /* escape_with_backslash_is_dangerous */
8938 1, /* levels_for_compare */
8939 &my_charset_utf16_handler,
8940 &my_collation_utf16_uca_handler,
8941 PAD_SPACE};
8942
8943 CHARSET_INFO my_charset_utf16_swedish_uca_ci = {
8944 109,
8945 0,
8946 0, /* number */
8947 MY_CS_UTF16_UCA_FLAGS, /* state */
8948 "utf16", /* cs name */
8949 "utf16_swedish_ci", /* m_coll_name */
8950 "", /* comment */
8951 swedish, /* tailoring */
8952 nullptr, /* coll_param */
8953 nullptr, /* ctype */
8954 nullptr, /* to_lower */
8955 nullptr, /* to_upper */
8956 nullptr, /* sort_order */
8957 nullptr, /* uca */
8958 nullptr, /* tab_to_uni */
8959 nullptr, /* tab_from_uni */
8960 &my_unicase_default, /* caseinfo */
8961 nullptr, /* state_map */
8962 nullptr, /* ident_map */
8963 8, /* strxfrm_multiply */
8964 1, /* caseup_multiply */
8965 1, /* casedn_multiply */
8966 2, /* mbminlen */
8967 4, /* mbmaxlen */
8968 1, /* mbmaxlenlen */
8969 9, /* min_sort_char */
8970 0xFFFF, /* max_sort_char */
8971 ' ', /* pad char */
8972 false, /* escape_with_backslash_is_dangerous */
8973 1, /* levels_for_compare */
8974 &my_charset_utf16_handler,
8975 &my_collation_utf16_uca_handler,
8976 PAD_SPACE};
8977
8978 CHARSET_INFO my_charset_utf16_turkish_uca_ci = {
8979 110,
8980 0,
8981 0, /* number */
8982 MY_CS_UTF16_UCA_FLAGS, /* state */
8983 "utf16", /* cs name */
8984 "utf16_turkish_ci", /* m_coll_name */
8985 "", /* comment */
8986 turkish, /* tailoring */
8987 nullptr, /* coll_param */
8988 nullptr, /* ctype */
8989 nullptr, /* to_lower */
8990 nullptr, /* to_upper */
8991 nullptr, /* sort_order */
8992 nullptr, /* uca */
8993 nullptr, /* tab_to_uni */
8994 nullptr, /* tab_from_uni */
8995 &my_unicase_turkish, /* caseinfo */
8996 nullptr, /* state_map */
8997 nullptr, /* ident_map */
8998 8, /* strxfrm_multiply */
8999 1, /* caseup_multiply */
9000 1, /* casedn_multiply */
9001 2, /* mbminlen */
9002 4, /* mbmaxlen */
9003 1, /* mbmaxlenlen */
9004 9, /* min_sort_char */
9005 0xFFFF, /* max_sort_char */
9006 ' ', /* pad char */
9007 false, /* escape_with_backslash_is_dangerous */
9008 1, /* levels_for_compare */
9009 &my_charset_utf16_handler,
9010 &my_collation_utf16_uca_handler,
9011 PAD_SPACE};
9012
9013 CHARSET_INFO my_charset_utf16_czech_uca_ci = {
9014 111,
9015 0,
9016 0, /* number */
9017 MY_CS_UTF16_UCA_FLAGS, /* state */
9018 "utf16", /* cs name */
9019 "utf16_czech_ci", /* m_coll_name */
9020 "", /* comment */
9021 czech, /* tailoring */
9022 nullptr, /* coll_param */
9023 nullptr, /* ctype */
9024 nullptr, /* to_lower */
9025 nullptr, /* to_upper */
9026 nullptr, /* sort_order */
9027 nullptr, /* uca */
9028 nullptr, /* tab_to_uni */
9029 nullptr, /* tab_from_uni */
9030 &my_unicase_default, /* caseinfo */
9031 nullptr, /* state_map */
9032 nullptr, /* ident_map */
9033 8, /* strxfrm_multiply */
9034 1, /* caseup_multiply */
9035 1, /* casedn_multiply */
9036 2, /* mbminlen */
9037 4, /* mbmaxlen */
9038 1, /* mbmaxlenlen */
9039 9, /* min_sort_char */
9040 0xFFFF, /* max_sort_char */
9041 ' ', /* pad char */
9042 false, /* escape_with_backslash_is_dangerous */
9043 1, /* levels_for_compare */
9044 &my_charset_utf16_handler,
9045 &my_collation_utf16_uca_handler,
9046 PAD_SPACE};
9047
9048 CHARSET_INFO my_charset_utf16_danish_uca_ci = {
9049 112,
9050 0,
9051 0, /* number */
9052 MY_CS_UTF16_UCA_FLAGS, /* state */
9053 "utf16", /* cs name */
9054 "utf16_danish_ci", /* m_coll_name */
9055 "", /* comment */
9056 danish, /* tailoring */
9057 nullptr, /* coll_param */
9058 nullptr, /* ctype */
9059 nullptr, /* to_lower */
9060 nullptr, /* to_upper */
9061 nullptr, /* sort_order */
9062 nullptr, /* uca */
9063 nullptr, /* tab_to_uni */
9064 nullptr, /* tab_from_uni */
9065 &my_unicase_default, /* caseinfo */
9066 nullptr, /* state_map */
9067 nullptr, /* ident_map */
9068 8, /* strxfrm_multiply */
9069 1, /* caseup_multiply */
9070 1, /* casedn_multiply */
9071 2, /* mbminlen */
9072 4, /* mbmaxlen */
9073 1, /* mbmaxlenlen */
9074 9, /* min_sort_char */
9075 0xFFFF, /* max_sort_char */
9076 ' ', /* pad char */
9077 false, /* escape_with_backslash_is_dangerous */
9078 1, /* levels_for_compare */
9079 &my_charset_utf16_handler,
9080 &my_collation_utf16_uca_handler,
9081 PAD_SPACE};
9082
9083 CHARSET_INFO my_charset_utf16_lithuanian_uca_ci = {
9084 113,
9085 0,
9086 0, /* number */
9087 MY_CS_UTF16_UCA_FLAGS, /* state */
9088 "utf16", /* cs name */
9089 "utf16_lithuanian_ci", /* m_coll_name */
9090 "", /* comment */
9091 lithuanian, /* tailoring */
9092 nullptr, /* coll_param */
9093 nullptr, /* ctype */
9094 nullptr, /* to_lower */
9095 nullptr, /* to_upper */
9096 nullptr, /* sort_order */
9097 nullptr, /* uca */
9098 nullptr, /* tab_to_uni */
9099 nullptr, /* tab_from_uni */
9100 &my_unicase_default, /* caseinfo */
9101 nullptr, /* state_map */
9102 nullptr, /* ident_map */
9103 8, /* strxfrm_multiply */
9104 1, /* caseup_multiply */
9105 1, /* casedn_multiply */
9106 2, /* mbminlen */
9107 4, /* mbmaxlen */
9108 1, /* mbmaxlenlen */
9109 9, /* min_sort_char */
9110 0xFFFF, /* max_sort_char */
9111 ' ', /* pad char */
9112 false, /* escape_with_backslash_is_dangerous */
9113 1, /* levels_for_compare */
9114 &my_charset_utf16_handler,
9115 &my_collation_utf16_uca_handler,
9116 PAD_SPACE};
9117
9118 CHARSET_INFO my_charset_utf16_slovak_uca_ci = {
9119 114,
9120 0,
9121 0, /* number */
9122 MY_CS_UTF16_UCA_FLAGS, /* state */
9123 "utf16", /* cs name */
9124 "utf16_slovak_ci", /* m_coll_name */
9125 "", /* comment */
9126 slovak, /* tailoring */
9127 nullptr, /* coll_param */
9128 nullptr, /* ctype */
9129 nullptr, /* to_lower */
9130 nullptr, /* to_upper */
9131 nullptr, /* sort_order */
9132 nullptr, /* uca */
9133 nullptr, /* tab_to_uni */
9134 nullptr, /* tab_from_uni */
9135 &my_unicase_default, /* caseinfo */
9136 nullptr, /* state_map */
9137 nullptr, /* ident_map */
9138 8, /* strxfrm_multiply */
9139 1, /* caseup_multiply */
9140 1, /* casedn_multiply */
9141 2, /* mbminlen */
9142 4, /* mbmaxlen */
9143 1, /* mbmaxlenlen */
9144 9, /* min_sort_char */
9145 0xFFFF, /* max_sort_char */
9146 ' ', /* pad char */
9147 false, /* escape_with_backslash_is_dangerous */
9148 1, /* levels_for_compare */
9149 &my_charset_utf16_handler,
9150 &my_collation_utf16_uca_handler,
9151 PAD_SPACE};
9152
9153 CHARSET_INFO my_charset_utf16_spanish2_uca_ci = {
9154 115,
9155 0,
9156 0, /* number */
9157 MY_CS_UTF16_UCA_FLAGS, /* state */
9158 "utf16", /* cs name */
9159 "utf16_spanish2_ci", /* m_coll_name */
9160 "", /* comment */
9161 spanish2, /* tailoring */
9162 nullptr, /* coll_param */
9163 nullptr, /* ctype */
9164 nullptr, /* to_lower */
9165 nullptr, /* to_upper */
9166 nullptr, /* sort_order */
9167 nullptr, /* uca */
9168 nullptr, /* tab_to_uni */
9169 nullptr, /* tab_from_uni */
9170 &my_unicase_default, /* caseinfo */
9171 nullptr, /* state_map */
9172 nullptr, /* ident_map */
9173 8, /* strxfrm_multiply */
9174 1, /* caseup_multiply */
9175 1, /* casedn_multiply */
9176 2, /* mbminlen */
9177 4, /* mbmaxlen */
9178 1, /* mbmaxlenlen */
9179 9, /* min_sort_char */
9180 0xFFFF, /* max_sort_char */
9181 ' ', /* pad char */
9182 false, /* escape_with_backslash_is_dangerous */
9183 1, /* levels_for_compare */
9184 &my_charset_utf16_handler,
9185 &my_collation_utf16_uca_handler,
9186 PAD_SPACE};
9187
9188 CHARSET_INFO my_charset_utf16_roman_uca_ci = {
9189 116,
9190 0,
9191 0, /* number */
9192 MY_CS_UTF16_UCA_FLAGS, /* state */
9193 "utf16", /* cs name */
9194 "utf16_roman_ci", /* m_coll_name */
9195 "", /* comment */
9196 roman, /* tailoring */
9197 nullptr, /* coll_param */
9198 nullptr, /* ctype */
9199 nullptr, /* to_lower */
9200 nullptr, /* to_upper */
9201 nullptr, /* sort_order */
9202 nullptr, /* uca */
9203 nullptr, /* tab_to_uni */
9204 nullptr, /* tab_from_uni */
9205 &my_unicase_default, /* caseinfo */
9206 nullptr, /* state_map */
9207 nullptr, /* ident_map */
9208 8, /* strxfrm_multiply */
9209 1, /* caseup_multiply */
9210 1, /* casedn_multiply */
9211 2, /* mbminlen */
9212 4, /* mbmaxlen */
9213 1, /* mbmaxlenlen */
9214 9, /* min_sort_char */
9215 0xFFFF, /* max_sort_char */
9216 ' ', /* pad char */
9217 false, /* escape_with_backslash_is_dangerous */
9218 1, /* levels_for_compare */
9219 &my_charset_utf16_handler,
9220 &my_collation_utf16_uca_handler,
9221 PAD_SPACE};
9222
9223 CHARSET_INFO my_charset_utf16_persian_uca_ci = {
9224 117,
9225 0,
9226 0, /* number */
9227 MY_CS_UTF16_UCA_FLAGS, /* state */
9228 "utf16", /* cs name */
9229 "utf16_persian_ci", /* m_coll_name */
9230 "", /* comment */
9231 persian, /* tailoring */
9232 nullptr, /* coll_param */
9233 nullptr, /* ctype */
9234 nullptr, /* to_lower */
9235 nullptr, /* to_upper */
9236 nullptr, /* sort_order */
9237 nullptr, /* uca */
9238 nullptr, /* tab_to_uni */
9239 nullptr, /* tab_from_uni */
9240 &my_unicase_default, /* caseinfo */
9241 nullptr, /* state_map */
9242 nullptr, /* ident_map */
9243 8, /* strxfrm_multiply */
9244 1, /* caseup_multiply */
9245 1, /* casedn_multiply */
9246 2, /* mbminlen */
9247 4, /* mbmaxlen */
9248 1, /* mbmaxlenlen */
9249 9, /* min_sort_char */
9250 0xFFFF, /* max_sort_char */
9251 ' ', /* pad char */
9252 false, /* escape_with_backslash_is_dangerous */
9253 1, /* levels_for_compare */
9254 &my_charset_utf16_handler,
9255 &my_collation_utf16_uca_handler,
9256 PAD_SPACE};
9257
9258 CHARSET_INFO my_charset_utf16_esperanto_uca_ci = {
9259 118,
9260 0,
9261 0, /* number */
9262 MY_CS_UTF16_UCA_FLAGS, /* state */
9263 "utf16", /* cs name */
9264 "utf16_esperanto_ci", /* m_coll_name */
9265 "", /* comment */
9266 esperanto, /* tailoring */
9267 nullptr, /* coll_param */
9268 nullptr, /* ctype */
9269 nullptr, /* to_lower */
9270 nullptr, /* to_upper */
9271 nullptr, /* sort_order */
9272 nullptr, /* uca */
9273 nullptr, /* tab_to_uni */
9274 nullptr, /* tab_from_uni */
9275 &my_unicase_default, /* caseinfo */
9276 nullptr, /* state_map */
9277 nullptr, /* ident_map */
9278 8, /* strxfrm_multiply */
9279 1, /* caseup_multiply */
9280 1, /* casedn_multiply */
9281 2, /* mbminlen */
9282 4, /* mbmaxlen */
9283 1, /* mbmaxlenlen */
9284 9, /* min_sort_char */
9285 0xFFFF, /* max_sort_char */
9286 ' ', /* pad char */
9287 false, /* escape_with_backslash_is_dangerous */
9288 1, /* levels_for_compare */
9289 &my_charset_utf16_handler,
9290 &my_collation_utf16_uca_handler,
9291 PAD_SPACE};
9292
9293 CHARSET_INFO my_charset_utf16_hungarian_uca_ci = {
9294 119,
9295 0,
9296 0, /* number */
9297 MY_CS_UTF16_UCA_FLAGS, /* state */
9298 "utf16", /* cs name */
9299 "utf16_hungarian_ci", /* m_coll_name */
9300 "", /* comment */
9301 hungarian, /* tailoring */
9302 nullptr, /* coll_param */
9303 nullptr, /* ctype */
9304 nullptr, /* to_lower */
9305 nullptr, /* to_upper */
9306 nullptr, /* sort_order */
9307 nullptr, /* uca */
9308 nullptr, /* tab_to_uni */
9309 nullptr, /* tab_from_uni */
9310 &my_unicase_default, /* caseinfo */
9311 nullptr, /* state_map */
9312 nullptr, /* ident_map */
9313 8, /* strxfrm_multiply */
9314 1, /* caseup_multiply */
9315 1, /* casedn_multiply */
9316 2, /* mbminlen */
9317 4, /* mbmaxlen */
9318 1, /* mbmaxlenlen */
9319 9, /* min_sort_char */
9320 0xFFFF, /* max_sort_char */
9321 ' ', /* pad char */
9322 false, /* escape_with_backslash_is_dangerous */
9323 1, /* levels_for_compare */
9324 &my_charset_utf16_handler,
9325 &my_collation_utf16_uca_handler,
9326 PAD_SPACE};
9327
9328 CHARSET_INFO my_charset_utf16_sinhala_uca_ci = {
9329 120,
9330 0,
9331 0, /* number */
9332 MY_CS_UTF16_UCA_FLAGS, /* state */
9333 "utf16", /* cs name */
9334 "utf16_sinhala_ci", /* m_coll_name */
9335 "", /* comment */
9336 sinhala, /* tailoring */
9337 nullptr, /* coll_param */
9338 nullptr, /* ctype */
9339 nullptr, /* to_lower */
9340 nullptr, /* to_upper */
9341 nullptr, /* sort_order */
9342 nullptr, /* uca */
9343 nullptr, /* tab_to_uni */
9344 nullptr, /* tab_from_uni */
9345 &my_unicase_default, /* caseinfo */
9346 nullptr, /* state_map */
9347 nullptr, /* ident_map */
9348 8, /* strxfrm_multiply */
9349 1, /* caseup_multiply */
9350 1, /* casedn_multiply */
9351 2, /* mbminlen */
9352 4, /* mbmaxlen */
9353 1, /* mbmaxlenlen */
9354 9, /* min_sort_char */
9355 0xFFFF, /* max_sort_char */
9356 ' ', /* pad char */
9357 false, /* escape_with_backslash_is_dangerous */
9358 1, /* levels_for_compare */
9359 &my_charset_utf16_handler,
9360 &my_collation_utf16_uca_handler,
9361 PAD_SPACE};
9362
9363 CHARSET_INFO my_charset_utf16_german2_uca_ci = {
9364 121,
9365 0,
9366 0, /* number */
9367 MY_CS_UTF16_UCA_FLAGS, /* state */
9368 "utf16", /* cs name */
9369 "utf16_german2_ci", /* m_coll_name */
9370 "", /* comment */
9371 german2, /* tailoring */
9372 nullptr, /* coll_param */
9373 nullptr, /* ctype */
9374 nullptr, /* to_lower */
9375 nullptr, /* to_upper */
9376 nullptr, /* sort_order */
9377 nullptr, /* uca */
9378 nullptr, /* tab_to_uni */
9379 nullptr, /* tab_from_uni */
9380 &my_unicase_default, /* caseinfo */
9381 nullptr, /* state_map */
9382 nullptr, /* ident_map */
9383 8, /* strxfrm_multiply */
9384 1, /* caseup_multiply */
9385 1, /* casedn_multiply */
9386 2, /* mbminlen */
9387 4, /* mbmaxlen */
9388 1, /* mbmaxlenlen */
9389 9, /* min_sort_char */
9390 0xFFFF, /* max_sort_char */
9391 ' ', /* pad char */
9392 false, /* escape_with_backslash_is_dangerous */
9393 1, /* levels_for_compare */
9394 &my_charset_utf16_handler,
9395 &my_collation_utf16_uca_handler,
9396 PAD_SPACE};
9397
9398 CHARSET_INFO my_charset_utf16_croatian_uca_ci = {
9399 122,
9400 0,
9401 0, /* number */
9402 MY_CS_UTF16_UCA_FLAGS, /* state */
9403 "utf16", /* cs name */
9404 "utf16_croatian_ci", /* m_coll_name */
9405 "", /* comment */
9406 croatian, /* tailoring */
9407 nullptr, /* coll_param */
9408 nullptr, /* ctype */
9409 nullptr, /* to_lower */
9410 nullptr, /* to_upper */
9411 nullptr, /* sort_order */
9412 nullptr, /* uca */
9413 nullptr, /* tab_to_uni */
9414 nullptr, /* tab_from_uni */
9415 &my_unicase_default, /* caseinfo */
9416 nullptr, /* state_map */
9417 nullptr, /* ident_map */
9418 8, /* strxfrm_multiply */
9419 1, /* caseup_multiply */
9420 1, /* casedn_multiply */
9421 2, /* mbminlen */
9422 4, /* mbmaxlen */
9423 1, /* mbmaxlenlen */
9424 9, /* min_sort_char */
9425 0xFFFF, /* max_sort_char */
9426 ' ', /* pad char */
9427 false, /* escape_with_backslash_is_dangerous */
9428 1, /* levels_for_compare */
9429 &my_charset_utf16_handler,
9430 &my_collation_utf16_uca_handler,
9431 PAD_SPACE};
9432
9433 CHARSET_INFO my_charset_utf16_unicode_520_ci = {
9434 123,
9435 0,
9436 0, /* number */
9437 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
9438 "utf16", /* csname */
9439 "utf16_unicode_520_ci", /* m_coll_name */
9440 "", /* comment */
9441 "", /* tailoring */
9442 nullptr, /* coll_param */
9443 nullptr, /* ctype */
9444 nullptr, /* to_lower */
9445 nullptr, /* to_upper */
9446 nullptr, /* sort_order */
9447 &my_uca_v520, /* uca */
9448 nullptr, /* tab_to_uni */
9449 nullptr, /* tab_from_uni */
9450 &my_unicase_unicode520, /* caseinfo */
9451 nullptr, /* state_map */
9452 nullptr, /* ident_map */
9453 8, /* strxfrm_multiply */
9454 1, /* caseup_multiply */
9455 1, /* casedn_multiply */
9456 2, /* mbminlen */
9457 4, /* mbmaxlen */
9458 1, /* mbmaxlenlen */
9459 9, /* min_sort_char */
9460 0x10FFFF, /* max_sort_char */
9461 0x20, /* pad char */
9462 false, /* escape_with_backslash_is_dangerous */
9463 1, /* levels_for_compare */
9464 &my_charset_utf16_handler,
9465 &my_collation_utf16_uca_handler,
9466 PAD_SPACE};
9467
9468 CHARSET_INFO my_charset_utf16_vietnamese_ci = {
9469 124,
9470 0,
9471 0, /* number */
9472 MY_CS_UTF16_UCA_FLAGS, /* state */
9473 "utf16", /* cs name */
9474 "utf16_vietnamese_ci", /* m_coll_name */
9475 "", /* comment */
9476 vietnamese, /* tailoring */
9477 nullptr, /* coll_param */
9478 nullptr, /* ctype */
9479 nullptr, /* to_lower */
9480 nullptr, /* to_upper */
9481 nullptr, /* sort_order */
9482 nullptr, /* uca */
9483 nullptr, /* tab_to_uni */
9484 nullptr, /* tab_from_uni */
9485 &my_unicase_default, /* caseinfo */
9486 nullptr, /* state_map */
9487 nullptr, /* ident_map */
9488 8, /* strxfrm_multiply */
9489 1, /* caseup_multiply */
9490 1, /* casedn_multiply */
9491 2, /* mbminlen */
9492 4, /* mbmaxlen */
9493 1, /* mbmaxlenlen */
9494 9, /* min_sort_char */
9495 0xFFFF, /* max_sort_char */
9496 ' ', /* pad char */
9497 false, /* escape_with_backslash_is_dangerous */
9498 1, /* levels_for_compare */
9499 &my_charset_utf16_handler,
9500 &my_collation_utf16_uca_handler,
9501 PAD_SPACE};
9502
9503 MY_COLLATION_HANDLER my_collation_gb18030_uca_handler = {
9504 my_coll_init_uca, /* init */
9505 my_coll_uninit_uca,
9506 my_strnncoll_any_uca,
9507 my_strnncollsp_any_uca,
9508 my_strnxfrm_any_uca,
9509 my_strnxfrmlen_simple,
9510 my_like_range_mb,
9511 my_wildcmp_uca,
9512 nullptr,
9513 my_instr_mb,
9514 my_hash_sort_any_uca,
9515 my_propagate_complex};
9516
9517 /**
9518 The array used for "type of characters" bit mask for each
9519 character. The ctype[0] is reserved for EOF(-1), so we use
9520 ctype[(char)+1]. Also refer to strings/CHARSET_INFO.txt
9521 */
9522 static const uchar ctype_gb18030[257] = {
9523 0, /* For standard library */
9524 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
9525 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
9526 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
9527 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16, 16,
9528 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
9530 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
9531 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
9532 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9533 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9534 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9535 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9536 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9537 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9538 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9539 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0};
9540
9541 extern MY_CHARSET_HANDLER my_charset_gb18030_uca_handler;
9542
9543 CHARSET_INFO my_charset_gb18030_unicode_520_ci = {
9544 250,
9545 0,
9546 0, /* number */
9547 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_NONASCII, /* state */
9548 "gb18030", /* cs name */
9549 "gb18030_unicode_520_ci", /* m_coll_name */
9550 "China National Standard GB18030", /* comment */
9551 "", /* tailoring */
9552 nullptr, /* coll_param */
9553 ctype_gb18030, /* ctype */
9554 nullptr, /* lower */
9555 nullptr, /* UPPER */
9556 nullptr, /* sort order */
9557 &my_uca_v520, /* uca */
9558 nullptr, /* tab_to_uni */
9559 nullptr, /* tab_from_uni */
9560 &my_unicase_unicode520, /* caseinfo */
9561 nullptr, /* state_map */
9562 nullptr, /* ident_map */
9563 8, /* strxfrm_multiply */
9564 2, /* caseup_multiply */
9565 2, /* casedn_multiply */
9566 1, /* mbminlen */
9567 4, /* mbmaxlen */
9568 2, /* mbmaxlenlen */
9569 0, /* min_sort_char */
9570 0xE3329A35, /* max_sort_char */
9571 ' ', /* pad char */
9572 false, /* escape_with_backslash_is_dangerous */
9573 1, /* levels_for_compare */
9574 &my_charset_gb18030_uca_handler,
9575 &my_collation_gb18030_uca_handler,
9576 PAD_SPACE};
9577
9578 CHARSET_INFO my_charset_utf8mb4_0900_ai_ci = {
9579 255,
9580 0,
9581 0, /* number */
9582 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_PRIMARY, /* state */
9583 MY_UTF8MB4, /* csname */
9584 MY_UTF8MB4 "_0900_ai_ci", /* m_coll_name */
9585 "UTF-8 Unicode", /* comment */
9586 nullptr, /* tailoring */
9587 nullptr, /* coll_param */
9588 ctype_utf8, /* ctype */
9589 nullptr, /* to_lower */
9590 nullptr, /* to_upper */
9591 nullptr, /* sort_order */
9592 &my_uca_v900, /* uca_900 */
9593 nullptr, /* tab_to_uni */
9594 nullptr, /* tab_from_uni */
9595 &my_unicase_unicode900, /* caseinfo */
9596 nullptr, /* state_map */
9597 nullptr, /* ident_map */
9598 0, /* strxfrm_multiply */
9599 1, /* caseup_multiply */
9600 1, /* casedn_multiply */
9601 1, /* mbminlen */
9602 4, /* mbmaxlen */
9603 1, /* mbmaxlenlen */
9604 9, /* min_sort_char */
9605 0x10FFFF, /* max_sort_char */
9606 ' ', /* pad char */
9607 false, /* escape_with_backslash_is_dangerous */
9608 1, /* levels_for_compare */
9609 &my_charset_utf8mb4_handler,
9610 &my_collation_uca_900_handler,
9611 NO_PAD};
9612
9613 CHARSET_INFO my_charset_utf8mb4_de_pb_0900_ai_ci = {
9614 256,
9615 0,
9616 0, /* number */
9617 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9618 MY_UTF8MB4, /* csname */
9619 MY_UTF8MB4 "_de_pb_0900_ai_ci", /* m_coll_name */
9620 "", /* comment */
9621 de_pb_cldr_30, /* tailoring */
9622 nullptr, /* coll_param */
9623 ctype_utf8, /* ctype */
9624 nullptr, /* to_lower */
9625 nullptr, /* to_upper */
9626 nullptr, /* sort_order */
9627 &my_uca_v900, /* uca_900 */
9628 nullptr, /* tab_to_uni */
9629 nullptr, /* tab_from_uni */
9630 &my_unicase_unicode900, /* caseinfo */
9631 nullptr, /* state_map */
9632 nullptr, /* ident_map */
9633 0, /* strxfrm_multiply */
9634 1, /* caseup_multiply */
9635 1, /* casedn_multiply */
9636 1, /* mbminlen */
9637 4, /* mbmaxlen */
9638 1, /* mbmaxlenlen */
9639 9, /* min_sort_char */
9640 0x10FFFF, /* max_sort_char */
9641 ' ', /* pad char */
9642 false, /* escape_with_backslash_is_dangerous */
9643 1, /* levels_for_compare */
9644 &my_charset_utf8mb4_handler,
9645 &my_collation_uca_900_handler,
9646 NO_PAD};
9647
9648 CHARSET_INFO my_charset_utf8mb4_is_0900_ai_ci = {
9649 257,
9650 0,
9651 0, /* number */
9652 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9653 MY_UTF8MB4, /* csname */
9654 MY_UTF8MB4 "_is_0900_ai_ci", /* m_coll_name */
9655 "", /* comment */
9656 is_cldr_30, /* tailoring */
9657 nullptr, /* coll_param */
9658 ctype_utf8, /* ctype */
9659 nullptr, /* to_lower */
9660 nullptr, /* to_upper */
9661 nullptr, /* sort_order */
9662 &my_uca_v900, /* uca */
9663 nullptr, /* tab_to_uni */
9664 nullptr, /* tab_from_uni */
9665 &my_unicase_unicode900, /* caseinfo */
9666 nullptr, /* state_map */
9667 nullptr, /* ident_map */
9668 0, /* strxfrm_multiply */
9669 1, /* caseup_multiply */
9670 1, /* casedn_multiply */
9671 1, /* mbminlen */
9672 4, /* mbmaxlen */
9673 1, /* mbmaxlenlen */
9674 9, /* min_sort_char */
9675 0x10FFFF, /* max_sort_char */
9676 ' ', /* pad char */
9677 false, /* escape_with_backslash_is_dangerous */
9678 1, /* levels_for_compare */
9679 &my_charset_utf8mb4_handler,
9680 &my_collation_uca_900_handler,
9681 NO_PAD};
9682
9683 CHARSET_INFO my_charset_utf8mb4_lv_0900_ai_ci = {
9684 258,
9685 0,
9686 0, /* number */
9687 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9688 MY_UTF8MB4, /* csname */
9689 MY_UTF8MB4 "_lv_0900_ai_ci", /* m_coll_name */
9690 "", /* comment */
9691 lv_cldr_30, /* tailoring */
9692 nullptr, /* coll_param */
9693 ctype_utf8, /* ctype */
9694 nullptr, /* to_lower */
9695 nullptr, /* to_upper */
9696 nullptr, /* sort_order */
9697 &my_uca_v900, /* uca */
9698 nullptr, /* tab_to_uni */
9699 nullptr, /* tab_from_uni */
9700 &my_unicase_unicode900, /* caseinfo */
9701 nullptr, /* state_map */
9702 nullptr, /* ident_map */
9703 0, /* strxfrm_multiply */
9704 1, /* caseup_multiply */
9705 1, /* casedn_multiply */
9706 1, /* mbminlen */
9707 4, /* mbmaxlen */
9708 1, /* mbmaxlenlen */
9709 9, /* min_sort_char */
9710 0x10FFFF, /* max_sort_char */
9711 ' ', /* pad char */
9712 false, /* escape_with_backslash_is_dangerous */
9713 1, /* levels_for_compare */
9714 &my_charset_utf8mb4_handler,
9715 &my_collation_uca_900_handler,
9716 NO_PAD};
9717
9718 CHARSET_INFO my_charset_utf8mb4_ro_0900_ai_ci = {
9719 259,
9720 0,
9721 0, /* number */
9722 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9723 MY_UTF8MB4, /* csname */
9724 MY_UTF8MB4 "_ro_0900_ai_ci", /* m_coll_name */
9725 "", /* comment */
9726 ro_cldr_30, /* tailoring */
9727 nullptr, /* coll_param */
9728 ctype_utf8, /* ctype */
9729 nullptr, /* to_lower */
9730 nullptr, /* to_upper */
9731 nullptr, /* sort_order */
9732 &my_uca_v900, /* uca */
9733 nullptr, /* tab_to_uni */
9734 nullptr, /* tab_from_uni */
9735 &my_unicase_unicode900, /* caseinfo */
9736 nullptr, /* state_map */
9737 nullptr, /* ident_map */
9738 0, /* strxfrm_multiply */
9739 1, /* caseup_multiply */
9740 1, /* casedn_multiply */
9741 1, /* mbminlen */
9742 4, /* mbmaxlen */
9743 1, /* mbmaxlenlen */
9744 9, /* min_sort_char */
9745 0x10FFFF, /* max_sort_char */
9746 ' ', /* pad char */
9747 false, /* escape_with_backslash_is_dangerous */
9748 1, /* levels_for_compare */
9749 &my_charset_utf8mb4_handler,
9750 &my_collation_uca_900_handler,
9751 NO_PAD};
9752
9753 CHARSET_INFO my_charset_utf8mb4_sl_0900_ai_ci = {
9754 260,
9755 0,
9756 0, /* number */
9757 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9758 MY_UTF8MB4, /* csname */
9759 MY_UTF8MB4 "_sl_0900_ai_ci", /* m_coll_name */
9760 "", /* comment */
9761 sl_cldr_30, /* tailoring */
9762 nullptr, /* coll_param */
9763 ctype_utf8, /* ctype */
9764 nullptr, /* to_lower */
9765 nullptr, /* to_upper */
9766 nullptr, /* sort_order */
9767 &my_uca_v900, /* uca */
9768 nullptr, /* tab_to_uni */
9769 nullptr, /* tab_from_uni */
9770 &my_unicase_unicode900, /* caseinfo */
9771 nullptr, /* state_map */
9772 nullptr, /* ident_map */
9773 0, /* strxfrm_multiply */
9774 1, /* caseup_multiply */
9775 1, /* casedn_multiply */
9776 1, /* mbminlen */
9777 4, /* mbmaxlen */
9778 1, /* mbmaxlenlen */
9779 9, /* min_sort_char */
9780 0x10FFFF, /* max_sort_char */
9781 ' ', /* pad char */
9782 false, /* escape_with_backslash_is_dangerous */
9783 1, /* levels_for_compare */
9784 &my_charset_utf8mb4_handler,
9785 &my_collation_uca_900_handler,
9786 NO_PAD};
9787
9788 CHARSET_INFO my_charset_utf8mb4_pl_0900_ai_ci = {
9789 261,
9790 0,
9791 0, /* number */
9792 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9793 MY_UTF8MB4, /* csname */
9794 MY_UTF8MB4 "_pl_0900_ai_ci", /* m_coll_name */
9795 "", /* comment */
9796 pl_cldr_30, /* tailoring */
9797 nullptr, /* coll_param */
9798 ctype_utf8, /* ctype */
9799 nullptr, /* to_lower */
9800 nullptr, /* to_upper */
9801 nullptr, /* sort_order */
9802 &my_uca_v900, /* uca */
9803 nullptr, /* tab_to_uni */
9804 nullptr, /* tab_from_uni */
9805 &my_unicase_unicode900, /* caseinfo */
9806 nullptr, /* state_map */
9807 nullptr, /* ident_map */
9808 0, /* strxfrm_multiply */
9809 1, /* caseup_multiply */
9810 1, /* casedn_multiply */
9811 1, /* mbminlen */
9812 4, /* mbmaxlen */
9813 1, /* mbmaxlenlen */
9814 9, /* min_sort_char */
9815 0x10FFFF, /* max_sort_char */
9816 ' ', /* pad char */
9817 false, /* escape_with_backslash_is_dangerous */
9818 1, /* levels_for_compare */
9819 &my_charset_utf8mb4_handler,
9820 &my_collation_uca_900_handler,
9821 NO_PAD};
9822
9823 CHARSET_INFO my_charset_utf8mb4_et_0900_ai_ci = {
9824 262,
9825 0,
9826 0, /* number */
9827 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9828 MY_UTF8MB4, /* csname */
9829 MY_UTF8MB4 "_et_0900_ai_ci", /* m_coll_name */
9830 "", /* comment */
9831 et_cldr_30, /* tailoring */
9832 nullptr, /* coll_param */
9833 ctype_utf8, /* ctype */
9834 nullptr, /* to_lower */
9835 nullptr, /* to_upper */
9836 nullptr, /* sort_order */
9837 &my_uca_v900, /* uca */
9838 nullptr, /* tab_to_uni */
9839 nullptr, /* tab_from_uni */
9840 &my_unicase_unicode900, /* caseinfo */
9841 nullptr, /* state_map */
9842 nullptr, /* ident_map */
9843 0, /* strxfrm_multiply */
9844 1, /* caseup_multiply */
9845 1, /* casedn_multiply */
9846 1, /* mbminlen */
9847 4, /* mbmaxlen */
9848 1, /* mbmaxlenlen */
9849 9, /* min_sort_char */
9850 0x10FFFF, /* max_sort_char */
9851 ' ', /* pad char */
9852 false, /* escape_with_backslash_is_dangerous */
9853 1, /* levels_for_compare */
9854 &my_charset_utf8mb4_handler,
9855 &my_collation_uca_900_handler,
9856 NO_PAD};
9857
9858 CHARSET_INFO my_charset_utf8mb4_es_0900_ai_ci = {
9859 263,
9860 0,
9861 0, /* number */
9862 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9863 MY_UTF8MB4, /* csname */
9864 MY_UTF8MB4 "_es_0900_ai_ci", /* m_coll_name */
9865 "", /* comment */
9866 spanish, /* tailoring */
9867 nullptr, /* coll_param */
9868 ctype_utf8, /* ctype */
9869 nullptr, /* to_lower */
9870 nullptr, /* to_upper */
9871 nullptr, /* sort_order */
9872 &my_uca_v900, /* uca */
9873 nullptr, /* tab_to_uni */
9874 nullptr, /* tab_from_uni */
9875 &my_unicase_unicode900, /* caseinfo */
9876 nullptr, /* state_map */
9877 nullptr, /* ident_map */
9878 0, /* strxfrm_multiply */
9879 1, /* caseup_multiply */
9880 1, /* casedn_multiply */
9881 1, /* mbminlen */
9882 4, /* mbmaxlen */
9883 1, /* mbmaxlenlen */
9884 9, /* min_sort_char */
9885 0x10FFFF, /* max_sort_char */
9886 ' ', /* pad char */
9887 false, /* escape_with_backslash_is_dangerous */
9888 1, /* levels_for_compare */
9889 &my_charset_utf8mb4_handler,
9890 &my_collation_uca_900_handler,
9891 NO_PAD};
9892
9893 CHARSET_INFO my_charset_utf8mb4_sv_0900_ai_ci = {
9894 264,
9895 0,
9896 0, /* number */
9897 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9898 MY_UTF8MB4, /* csname */
9899 MY_UTF8MB4 "_sv_0900_ai_ci", /* m_coll_name */
9900 "", /* comment */
9901 sv_cldr_30, /* tailoring */
9902 nullptr, /* coll_param */
9903 ctype_utf8, /* ctype */
9904 nullptr, /* to_lower */
9905 nullptr, /* to_upper */
9906 nullptr, /* sort_order */
9907 &my_uca_v900, /* uca */
9908 nullptr, /* tab_to_uni */
9909 nullptr, /* tab_from_uni */
9910 &my_unicase_unicode900, /* caseinfo */
9911 nullptr, /* state_map */
9912 nullptr, /* ident_map */
9913 0, /* strxfrm_multiply */
9914 1, /* caseup_multiply */
9915 1, /* casedn_multiply */
9916 1, /* mbminlen */
9917 4, /* mbmaxlen */
9918 1, /* mbmaxlenlen */
9919 9, /* min_sort_char */
9920 0x10FFFF, /* max_sort_char */
9921 ' ', /* pad char */
9922 false, /* escape_with_backslash_is_dangerous */
9923 1, /* levels_for_compare */
9924 &my_charset_utf8mb4_handler,
9925 &my_collation_uca_900_handler,
9926 NO_PAD};
9927
9928 CHARSET_INFO my_charset_utf8mb4_tr_0900_ai_ci = {
9929 265,
9930 0,
9931 0, /* number */
9932 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9933 MY_UTF8MB4, /* csname */
9934 MY_UTF8MB4 "_tr_0900_ai_ci", /* m_coll_name */
9935 "", /* comment */
9936 tr_cldr_30, /* tailoring */
9937 nullptr, /* coll_param */
9938 ctype_utf8, /* ctype */
9939 nullptr, /* to_lower */
9940 nullptr, /* to_upper */
9941 nullptr, /* sort_order */
9942 &my_uca_v900, /* uca */
9943 nullptr, /* tab_to_uni */
9944 nullptr, /* tab_from_uni */
9945 &my_unicase_unicode900, /* caseinfo */
9946 nullptr, /* state_map */
9947 nullptr, /* ident_map */
9948 0, /* strxfrm_multiply */
9949 1, /* caseup_multiply */
9950 1, /* casedn_multiply */
9951 1, /* mbminlen */
9952 4, /* mbmaxlen */
9953 1, /* mbmaxlenlen */
9954 9, /* min_sort_char */
9955 0x10FFFF, /* max_sort_char */
9956 ' ', /* pad char */
9957 false, /* escape_with_backslash_is_dangerous */
9958 1, /* levels_for_compare */
9959 &my_charset_utf8mb4_handler,
9960 &my_collation_uca_900_handler,
9961 NO_PAD};
9962
9963 CHARSET_INFO my_charset_utf8mb4_cs_0900_ai_ci = {
9964 266,
9965 0,
9966 0, /* number */
9967 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9968 MY_UTF8MB4, /* csname */
9969 MY_UTF8MB4 "_cs_0900_ai_ci", /* m_coll_name */
9970 "", /* comment */
9971 cs_cldr_30, /* tailoring */
9972 nullptr, /* coll_param */
9973 ctype_utf8, /* ctype */
9974 nullptr, /* to_lower */
9975 nullptr, /* to_upper */
9976 nullptr, /* sort_order */
9977 &my_uca_v900, /* uca */
9978 nullptr, /* tab_to_uni */
9979 nullptr, /* tab_from_uni */
9980 &my_unicase_unicode900, /* caseinfo */
9981 nullptr, /* state_map */
9982 nullptr, /* ident_map */
9983 0, /* strxfrm_multiply */
9984 1, /* caseup_multiply */
9985 1, /* casedn_multiply */
9986 1, /* mbminlen */
9987 4, /* mbmaxlen */
9988 1, /* mbmaxlenlen */
9989 9, /* min_sort_char */
9990 0x10FFFF, /* max_sort_char */
9991 ' ', /* pad char */
9992 false, /* escape_with_backslash_is_dangerous */
9993 1, /* levels_for_compare */
9994 &my_charset_utf8mb4_handler,
9995 &my_collation_uca_900_handler,
9996 NO_PAD};
9997
9998 CHARSET_INFO my_charset_utf8mb4_da_0900_ai_ci = {
9999 267,
10000 0,
10001 0, /* number */
10002 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10003 MY_UTF8MB4, /* csname */
10004 MY_UTF8MB4 "_da_0900_ai_ci", /* m_coll_name */
10005 "", /* comment */
10006 da_cldr_30, /* tailoring */
10007 nullptr, /* coll_param */
10008 ctype_utf8, /* ctype */
10009 nullptr, /* to_lower */
10010 nullptr, /* to_upper */
10011 nullptr, /* sort_order */
10012 &my_uca_v900, /* uca */
10013 nullptr, /* tab_to_uni */
10014 nullptr, /* tab_from_uni */
10015 &my_unicase_unicode900, /* caseinfo */
10016 nullptr, /* state_map */
10017 nullptr, /* ident_map */
10018 0, /* strxfrm_multiply */
10019 1, /* caseup_multiply */
10020 1, /* casedn_multiply */
10021 1, /* mbminlen */
10022 4, /* mbmaxlen */
10023 1, /* mbmaxlenlen */
10024 9, /* min_sort_char */
10025 0x10FFFF, /* max_sort_char */
10026 ' ', /* pad char */
10027 false, /* escape_with_backslash_is_dangerous */
10028 1, /* levels_for_compare */
10029 &my_charset_utf8mb4_handler,
10030 &my_collation_uca_900_handler,
10031 NO_PAD};
10032
10033 CHARSET_INFO my_charset_utf8mb4_lt_0900_ai_ci = {
10034 268,
10035 0,
10036 0, /* number */
10037 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10038 MY_UTF8MB4, /* csname */
10039 MY_UTF8MB4 "_lt_0900_ai_ci", /* m_coll_name */
10040 "", /* comment */
10041 lt_cldr_30, /* tailoring */
10042 nullptr, /* coll_param */
10043 ctype_utf8, /* ctype */
10044 nullptr, /* to_lower */
10045 nullptr, /* to_upper */
10046 nullptr, /* sort_order */
10047 &my_uca_v900, /* uca */
10048 nullptr, /* tab_to_uni */
10049 nullptr, /* tab_from_uni */
10050 &my_unicase_unicode900, /* caseinfo */
10051 nullptr, /* state_map */
10052 nullptr, /* ident_map */
10053 0, /* strxfrm_multiply */
10054 1, /* caseup_multiply */
10055 1, /* casedn_multiply */
10056 1, /* mbminlen */
10057 4, /* mbmaxlen */
10058 1, /* mbmaxlenlen */
10059 9, /* min_sort_char */
10060 0x10FFFF, /* max_sort_char */
10061 ' ', /* pad char */
10062 false, /* escape_with_backslash_is_dangerous */
10063 1, /* levels_for_compare */
10064 &my_charset_utf8mb4_handler,
10065 &my_collation_uca_900_handler,
10066 NO_PAD};
10067
10068 CHARSET_INFO my_charset_utf8mb4_sk_0900_ai_ci = {
10069 269,
10070 0,
10071 0, /* number */
10072 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10073 MY_UTF8MB4, /* csname */
10074 MY_UTF8MB4 "_sk_0900_ai_ci", /* m_coll_name */
10075 "", /* comment */
10076 sk_cldr_30, /* tailoring */
10077 nullptr, /* coll_param */
10078 ctype_utf8, /* ctype */
10079 nullptr, /* to_lower */
10080 nullptr, /* to_upper */
10081 nullptr, /* sort_order */
10082 &my_uca_v900, /* uca */
10083 nullptr, /* tab_to_uni */
10084 nullptr, /* tab_from_uni */
10085 &my_unicase_unicode900, /* caseinfo */
10086 nullptr, /* state_map */
10087 nullptr, /* ident_map */
10088 0, /* strxfrm_multiply */
10089 1, /* caseup_multiply */
10090 1, /* casedn_multiply */
10091 1, /* mbminlen */
10092 4, /* mbmaxlen */
10093 1, /* mbmaxlenlen */
10094 9, /* min_sort_char */
10095 0x10FFFF, /* max_sort_char */
10096 ' ', /* pad char */
10097 false, /* escape_with_backslash_is_dangerous */
10098 1, /* levels_for_compare */
10099 &my_charset_utf8mb4_handler,
10100 &my_collation_uca_900_handler,
10101 NO_PAD};
10102
10103 CHARSET_INFO my_charset_utf8mb4_es_trad_0900_ai_ci = {
10104 270,
10105 0,
10106 0, /* number */
10107 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10108 MY_UTF8MB4, /* csname */
10109 MY_UTF8MB4 "_es_trad_0900_ai_ci", /* m_coll_name */
10110 "", /* comment */
10111 es_trad_cldr_30, /* tailoring */
10112 nullptr, /* coll_param */
10113 ctype_utf8, /* ctype */
10114 nullptr, /* to_lower */
10115 nullptr, /* to_upper */
10116 nullptr, /* sort_order */
10117 &my_uca_v900, /* uca */
10118 nullptr, /* tab_to_uni */
10119 nullptr, /* tab_from_uni */
10120 &my_unicase_unicode900, /* caseinfo */
10121 nullptr, /* state_map */
10122 nullptr, /* ident_map */
10123 0, /* strxfrm_multiply */
10124 1, /* caseup_multiply */
10125 1, /* casedn_multiply */
10126 1, /* mbminlen */
10127 4, /* mbmaxlen */
10128 1, /* mbmaxlenlen */
10129 9, /* min_sort_char */
10130 0x10FFFF, /* max_sort_char */
10131 ' ', /* pad char */
10132 false, /* escape_with_backslash_is_dangerous */
10133 1, /* levels_for_compare */
10134 &my_charset_utf8mb4_handler,
10135 &my_collation_uca_900_handler,
10136 NO_PAD};
10137
10138 CHARSET_INFO my_charset_utf8mb4_la_0900_ai_ci = {
10139 271,
10140 0,
10141 0, /* number */
10142 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10143 MY_UTF8MB4, /* csname */
10144 MY_UTF8MB4 "_la_0900_ai_ci", /* m_coll_name */
10145 "", /* comment */
10146 roman, /* tailoring */
10147 nullptr, /* coll_param */
10148 ctype_utf8, /* ctype */
10149 nullptr, /* to_lower */
10150 nullptr, /* to_upper */
10151 nullptr, /* sort_order */
10152 &my_uca_v900, /* uca */
10153 nullptr, /* tab_to_uni */
10154 nullptr, /* tab_from_uni */
10155 &my_unicase_unicode900, /* caseinfo */
10156 nullptr, /* state_map */
10157 nullptr, /* ident_map */
10158 0, /* strxfrm_multiply */
10159 1, /* caseup_multiply */
10160 1, /* casedn_multiply */
10161 1, /* mbminlen */
10162 4, /* mbmaxlen */
10163 1, /* mbmaxlenlen */
10164 9, /* min_sort_char */
10165 0x10FFFF, /* max_sort_char */
10166 ' ', /* pad char */
10167 false, /* escape_with_backslash_is_dangerous */
10168 1, /* levels_for_compare */
10169 &my_charset_utf8mb4_handler,
10170 &my_collation_uca_900_handler,
10171 NO_PAD};
10172
10173 #if 0
10174 CHARSET_INFO my_charset_utf8mb4_fa_0900_ai_ci=
10175 {
10176 272, 0, 0, /* number */
10177 MY_CS_UTF8MB4_UCA_FLAGS,/* state */
10178 MY_UTF8MB4, /* csname */
10179 MY_UTF8MB4 "_fa_0900_ai_ci",/* m_coll_name */
10180 "", /* comment */
10181 fa_cldr_30, /* tailoring */
10182 &fa_coll_param, /* coll_param */
10183 ctype_utf8, /* ctype */
10184 NULL, /* to_lower */
10185 NULL, /* to_upper */
10186 NULL, /* sort_order */
10187 &my_uca_v900, /* uca */
10188 NULL, /* tab_to_uni */
10189 NULL, /* tab_from_uni */
10190 &my_unicase_unicode900,/* caseinfo */
10191 NULL, /* state_map */
10192 NULL, /* ident_map */
10193 0, /* strxfrm_multiply */
10194 1, /* caseup_multiply */
10195 1, /* casedn_multiply */
10196 1, /* mbminlen */
10197 4, /* mbmaxlen */
10198 1, /* mbmaxlenlen */
10199 9, /* min_sort_char */
10200 0x10FFFF, /* max_sort_char */
10201 ' ', /* pad char */
10202 0, /* escape_with_backslash_is_dangerous */
10203 1, /* levels_for_compare */
10204 &my_charset_utf8mb4_handler,
10205 &my_collation_uca_900_handler,
10206 NO_PAD
10207 };
10208 #endif
10209
10210 CHARSET_INFO my_charset_utf8mb4_eo_0900_ai_ci = {
10211 273,
10212 0,
10213 0, /* number */
10214 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10215 MY_UTF8MB4, /* csname */
10216 MY_UTF8MB4 "_eo_0900_ai_ci", /* m_coll_name */
10217 "", /* comment */
10218 esperanto, /* tailoring */
10219 nullptr, /* coll_param */
10220 ctype_utf8, /* ctype */
10221 nullptr, /* to_lower */
10222 nullptr, /* to_upper */
10223 nullptr, /* sort_order */
10224 &my_uca_v900, /* uca */
10225 nullptr, /* tab_to_uni */
10226 nullptr, /* tab_from_uni */
10227 &my_unicase_unicode900, /* caseinfo */
10228 nullptr, /* state_map */
10229 nullptr, /* ident_map */
10230 0, /* strxfrm_multiply */
10231 1, /* caseup_multiply */
10232 1, /* casedn_multiply */
10233 1, /* mbminlen */
10234 4, /* mbmaxlen */
10235 1, /* mbmaxlenlen */
10236 9, /* min_sort_char */
10237 0x10FFFF, /* max_sort_char */
10238 ' ', /* pad char */
10239 false, /* escape_with_backslash_is_dangerous */
10240 1, /* levels_for_compare */
10241 &my_charset_utf8mb4_handler,
10242 &my_collation_uca_900_handler,
10243 NO_PAD};
10244
10245 CHARSET_INFO my_charset_utf8mb4_hu_0900_ai_ci = {
10246 274,
10247 0,
10248 0, /* number */
10249 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10250 MY_UTF8MB4, /* csname */
10251 MY_UTF8MB4 "_hu_0900_ai_ci", /* m_coll_name */
10252 "", /* comment */
10253 hu_cldr_30, /* tailoring */
10254 nullptr, /* coll_param */
10255 ctype_utf8, /* ctype */
10256 nullptr, /* to_lower */
10257 nullptr, /* to_upper */
10258 nullptr, /* sort_order */
10259 &my_uca_v900, /* uca */
10260 nullptr, /* tab_to_uni */
10261 nullptr, /* tab_from_uni */
10262 &my_unicase_unicode900, /* caseinfo */
10263 nullptr, /* state_map */
10264 nullptr, /* ident_map */
10265 0, /* strxfrm_multiply */
10266 1, /* caseup_multiply */
10267 1, /* casedn_multiply */
10268 1, /* mbminlen */
10269 4, /* mbmaxlen */
10270 1, /* mbmaxlenlen */
10271 9, /* min_sort_char */
10272 0x10FFFF, /* max_sort_char */
10273 ' ', /* pad char */
10274 false, /* escape_with_backslash_is_dangerous */
10275 1, /* levels_for_compare */
10276 &my_charset_utf8mb4_handler,
10277 &my_collation_uca_900_handler,
10278 NO_PAD};
10279
10280 CHARSET_INFO my_charset_utf8mb4_hr_0900_ai_ci = {
10281 275,
10282 0,
10283 0, /* number */
10284 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10285 MY_UTF8MB4, /* csname */
10286 MY_UTF8MB4 "_hr_0900_ai_ci", /* m_coll_name */
10287 "", /* comment */
10288 hr_cldr_30, /* tailoring */
10289 &hr_coll_param, /* coll_param */
10290 ctype_utf8, /* ctype */
10291 nullptr, /* to_lower */
10292 nullptr, /* to_upper */
10293 nullptr, /* sort_order */
10294 &my_uca_v900, /* uca */
10295 nullptr, /* tab_to_uni */
10296 nullptr, /* tab_from_uni */
10297 &my_unicase_unicode900, /* caseinfo */
10298 nullptr, /* state_map */
10299 nullptr, /* ident_map */
10300 0, /* strxfrm_multiply */
10301 1, /* caseup_multiply */
10302 1, /* casedn_multiply */
10303 1, /* mbminlen */
10304 4, /* mbmaxlen */
10305 1, /* mbmaxlenlen */
10306 9, /* min_sort_char */
10307 0x10FFFF, /* max_sort_char */
10308 ' ', /* pad char */
10309 false, /* escape_with_backslash_is_dangerous */
10310 1, /* levels_for_compare */
10311 &my_charset_utf8mb4_handler,
10312 &my_collation_uca_900_handler,
10313 NO_PAD};
10314
10315 #if 0
10316 CHARSET_INFO my_charset_utf8mb4_si_0900_ai_ci=
10317 {
10318 276, 0, 0, /* number */
10319 MY_CS_UTF8MB4_UCA_FLAGS,/* state */
10320 MY_UTF8MB4, /* csname */
10321 MY_UTF8MB4 "_si_0900_ai_ci",/* m_coll_name */
10322 "", /* comment */
10323 si_cldr_30, /* tailoring */
10324 NULL, /* coll_param */
10325 ctype_utf8, /* ctype */
10326 NULL, /* to_lower */
10327 NULL, /* to_upper */
10328 NULL, /* sort_order */
10329 &my_uca_v900, /* uca */
10330 NULL, /* tab_to_uni */
10331 NULL, /* tab_from_uni */
10332 &my_unicase_unicode900,/* caseinfo */
10333 NULL, /* state_map */
10334 NULL, /* ident_map */
10335 0, /* strxfrm_multiply */
10336 1, /* caseup_multiply */
10337 1, /* casedn_multiply */
10338 1, /* mbminlen */
10339 4, /* mbmaxlen */
10340 1, /* mbmaxlenlen */
10341 9, /* min_sort_char */
10342 0x10FFFF, /* max_sort_char */
10343 ' ', /* pad char */
10344 0, /* escape_with_backslash_is_dangerous */
10345 1, /* levels_for_compare */
10346 &my_charset_utf8mb4_handler,
10347 &my_collation_uca_900_handler,
10348 NO_PAD
10349 };
10350 #endif
10351
10352 CHARSET_INFO my_charset_utf8mb4_vi_0900_ai_ci = {
10353 277,
10354 0,
10355 0, /* number */
10356 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10357 MY_UTF8MB4, /* csname */
10358 MY_UTF8MB4 "_vi_0900_ai_ci", /* m_coll_name */
10359 "", /* comment */
10360 vi_cldr_30, /* tailoring */
10361 nullptr, /* coll_param */
10362 ctype_utf8, /* ctype */
10363 nullptr, /* to_lower */
10364 nullptr, /* to_upper */
10365 nullptr, /* sort_order */
10366 &my_uca_v900, /* uca */
10367 nullptr, /* tab_to_uni */
10368 nullptr, /* tab_from_uni */
10369 &my_unicase_unicode900, /* caseinfo */
10370 nullptr, /* state_map */
10371 nullptr, /* ident_map */
10372 0, /* strxfrm_multiply */
10373 1, /* caseup_multiply */
10374 1, /* casedn_multiply */
10375 1, /* mbminlen */
10376 4, /* mbmaxlen */
10377 1, /* mbmaxlenlen */
10378 9, /* min_sort_char */
10379 0x10FFFF, /* max_sort_char */
10380 ' ', /* pad char */
10381 false, /* escape_with_backslash_is_dangerous */
10382 1, /* levels_for_compare */
10383 &my_charset_utf8mb4_handler,
10384 &my_collation_uca_900_handler,
10385 NO_PAD};
10386
10387 CHARSET_INFO my_charset_utf8mb4_0900_as_cs = {
10388 278,
10389 0,
10390 0, /* number */
10391 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10392 MY_UTF8MB4, /* csname */
10393 MY_UTF8MB4 "_0900_as_cs", /* m_coll_name */
10394 "", /* comment */
10395 nullptr, /* tailoring */
10396 nullptr, /* coll_param */
10397 ctype_utf8, /* ctype */
10398 nullptr, /* to_lower */
10399 nullptr, /* to_upper */
10400 nullptr, /* sort_order */
10401 &my_uca_v900, /* uca */
10402 nullptr, /* tab_to_uni */
10403 nullptr, /* tab_from_uni */
10404 &my_unicase_unicode900, /* caseinfo */
10405 nullptr, /* state_map */
10406 nullptr, /* ident_map */
10407 0, /* strxfrm_multiply */
10408 1, /* caseup_multiply */
10409 1, /* casedn_multiply */
10410 1, /* mbminlen */
10411 4, /* mbmaxlen */
10412 1, /* mbmaxlenlen */
10413 9, /* min_sort_char */
10414 0x10FFFF, /* max_sort_char */
10415 ' ', /* pad char */
10416 false, /* escape_with_backslash_is_dangerous */
10417 3, /* levels_for_compare */
10418 &my_charset_utf8mb4_handler,
10419 &my_collation_uca_900_handler,
10420 NO_PAD};
10421
10422 CHARSET_INFO my_charset_utf8mb4_de_pb_0900_as_cs = {
10423 279,
10424 0,
10425 0, /* number */
10426 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10427 MY_UTF8MB4, /* csname */
10428 MY_UTF8MB4 "_de_pb_0900_as_cs", /* m_coll_name */
10429 "", /* comment */
10430 de_pb_cldr_30, /* tailoring */
10431 nullptr, /* coll_param */
10432 ctype_utf8, /* ctype */
10433 nullptr, /* to_lower */
10434 nullptr, /* to_upper */
10435 nullptr, /* sort_order */
10436 &my_uca_v900, /* uca */
10437 nullptr, /* tab_to_uni */
10438 nullptr, /* tab_from_uni */
10439 &my_unicase_unicode900, /* caseinfo */
10440 nullptr, /* state_map */
10441 nullptr, /* ident_map */
10442 0, /* strxfrm_multiply */
10443 1, /* caseup_multiply */
10444 1, /* casedn_multiply */
10445 1, /* mbminlen */
10446 4, /* mbmaxlen */
10447 1, /* mbmaxlenlen */
10448 9, /* min_sort_char */
10449 0x10FFFF, /* max_sort_char */
10450 ' ', /* pad char */
10451 false, /* escape_with_backslash_is_dangerous */
10452 3, /* levels_for_compare */
10453 &my_charset_utf8mb4_handler,
10454 &my_collation_uca_900_handler,
10455 NO_PAD};
10456
10457 CHARSET_INFO my_charset_utf8mb4_is_0900_as_cs = {
10458 280,
10459 0,
10460 0, /* number */
10461 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10462 MY_UTF8MB4, /* csname */
10463 MY_UTF8MB4 "_is_0900_as_cs", /* m_coll_name */
10464 "", /* comment */
10465 is_cldr_30, /* tailoring */
10466 nullptr, /* coll_param */
10467 ctype_utf8, /* ctype */
10468 nullptr, /* to_lower */
10469 nullptr, /* to_upper */
10470 nullptr, /* sort_order */
10471 &my_uca_v900, /* uca */
10472 nullptr, /* tab_to_uni */
10473 nullptr, /* tab_from_uni */
10474 &my_unicase_unicode900, /* caseinfo */
10475 nullptr, /* state_map */
10476 nullptr, /* ident_map */
10477 0, /* strxfrm_multiply */
10478 1, /* caseup_multiply */
10479 1, /* casedn_multiply */
10480 1, /* mbminlen */
10481 4, /* mbmaxlen */
10482 1, /* mbmaxlenlen */
10483 9, /* min_sort_char */
10484 0x10FFFF, /* max_sort_char */
10485 ' ', /* pad char */
10486 false, /* escape_with_backslash_is_dangerous */
10487 3, /* levels_for_compare */
10488 &my_charset_utf8mb4_handler,
10489 &my_collation_uca_900_handler,
10490 NO_PAD};
10491
10492 CHARSET_INFO my_charset_utf8mb4_lv_0900_as_cs = {
10493 281,
10494 0,
10495 0, /* number */
10496 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10497 MY_UTF8MB4, /* csname */
10498 MY_UTF8MB4 "_lv_0900_as_cs", /* m_coll_name */
10499 "", /* comment */
10500 lv_cldr_30, /* tailoring */
10501 nullptr, /* coll_param */
10502 ctype_utf8, /* ctype */
10503 nullptr, /* to_lower */
10504 nullptr, /* to_upper */
10505 nullptr, /* sort_order */
10506 &my_uca_v900, /* uca */
10507 nullptr, /* tab_to_uni */
10508 nullptr, /* tab_from_uni */
10509 &my_unicase_unicode900, /* caseinfo */
10510 nullptr, /* state_map */
10511 nullptr, /* ident_map */
10512 0, /* strxfrm_multiply */
10513 1, /* caseup_multiply */
10514 1, /* casedn_multiply */
10515 1, /* mbminlen */
10516 4, /* mbmaxlen */
10517 1, /* mbmaxlenlen */
10518 9, /* min_sort_char */
10519 0x10FFFF, /* max_sort_char */
10520 ' ', /* pad char */
10521 false, /* escape_with_backslash_is_dangerous */
10522 3, /* levels_for_compare */
10523 &my_charset_utf8mb4_handler,
10524 &my_collation_uca_900_handler,
10525 NO_PAD};
10526
10527 CHARSET_INFO my_charset_utf8mb4_ro_0900_as_cs = {
10528 282,
10529 0,
10530 0, /* number */
10531 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10532 MY_UTF8MB4, /* csname */
10533 MY_UTF8MB4 "_ro_0900_as_cs", /* m_coll_name */
10534 "", /* comment */
10535 ro_cldr_30, /* tailoring */
10536 nullptr, /* coll_param */
10537 ctype_utf8, /* ctype */
10538 nullptr, /* to_lower */
10539 nullptr, /* to_upper */
10540 nullptr, /* sort_order */
10541 &my_uca_v900, /* uca */
10542 nullptr, /* tab_to_uni */
10543 nullptr, /* tab_from_uni */
10544 &my_unicase_unicode900, /* caseinfo */
10545 nullptr, /* state_map */
10546 nullptr, /* ident_map */
10547 0, /* strxfrm_multiply */
10548 1, /* caseup_multiply */
10549 1, /* casedn_multiply */
10550 1, /* mbminlen */
10551 4, /* mbmaxlen */
10552 1, /* mbmaxlenlen */
10553 9, /* min_sort_char */
10554 0x10FFFF, /* max_sort_char */
10555 ' ', /* pad char */
10556 false, /* escape_with_backslash_is_dangerous */
10557 3, /* levels_for_compare */
10558 &my_charset_utf8mb4_handler,
10559 &my_collation_uca_900_handler,
10560 NO_PAD};
10561
10562 CHARSET_INFO my_charset_utf8mb4_sl_0900_as_cs = {
10563 283,
10564 0,
10565 0, /* number */
10566 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10567 MY_UTF8MB4, /* csname */
10568 MY_UTF8MB4 "_sl_0900_as_cs", /* m_coll_name */
10569 "", /* comment */
10570 sl_cldr_30, /* tailoring */
10571 nullptr, /* coll_param */
10572 ctype_utf8, /* ctype */
10573 nullptr, /* to_lower */
10574 nullptr, /* to_upper */
10575 nullptr, /* sort_order */
10576 &my_uca_v900, /* uca */
10577 nullptr, /* tab_to_uni */
10578 nullptr, /* tab_from_uni */
10579 &my_unicase_unicode900, /* caseinfo */
10580 nullptr, /* state_map */
10581 nullptr, /* ident_map */
10582 0, /* strxfrm_multiply */
10583 1, /* caseup_multiply */
10584 1, /* casedn_multiply */
10585 1, /* mbminlen */
10586 4, /* mbmaxlen */
10587 1, /* mbmaxlenlen */
10588 9, /* min_sort_char */
10589 0x10FFFF, /* max_sort_char */
10590 ' ', /* pad char */
10591 false, /* escape_with_backslash_is_dangerous */
10592 3, /* levels_for_compare */
10593 &my_charset_utf8mb4_handler,
10594 &my_collation_uca_900_handler,
10595 NO_PAD};
10596
10597 CHARSET_INFO my_charset_utf8mb4_pl_0900_as_cs = {
10598 284,
10599 0,
10600 0, /* number */
10601 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10602 MY_UTF8MB4, /* csname */
10603 MY_UTF8MB4 "_pl_0900_as_cs", /* m_coll_name */
10604 "", /* comment */
10605 pl_cldr_30, /* tailoring */
10606 nullptr, /* coll_param */
10607 ctype_utf8, /* ctype */
10608 nullptr, /* to_lower */
10609 nullptr, /* to_upper */
10610 nullptr, /* sort_order */
10611 &my_uca_v900, /* uca */
10612 nullptr, /* tab_to_uni */
10613 nullptr, /* tab_from_uni */
10614 &my_unicase_unicode900, /* caseinfo */
10615 nullptr, /* state_map */
10616 nullptr, /* ident_map */
10617 0, /* strxfrm_multiply */
10618 1, /* caseup_multiply */
10619 1, /* casedn_multiply */
10620 1, /* mbminlen */
10621 4, /* mbmaxlen */
10622 1, /* mbmaxlenlen */
10623 9, /* min_sort_char */
10624 0x10FFFF, /* max_sort_char */
10625 ' ', /* pad char */
10626 false, /* escape_with_backslash_is_dangerous */
10627 3, /* levels_for_compare */
10628 &my_charset_utf8mb4_handler,
10629 &my_collation_uca_900_handler,
10630 NO_PAD};
10631
10632 CHARSET_INFO my_charset_utf8mb4_et_0900_as_cs = {
10633 285,
10634 0,
10635 0, /* number */
10636 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10637 MY_UTF8MB4, /* csname */
10638 MY_UTF8MB4 "_et_0900_as_cs", /* m_coll_name */
10639 "", /* comment */
10640 et_cldr_30, /* tailoring */
10641 nullptr, /* coll_param */
10642 ctype_utf8, /* ctype */
10643 nullptr, /* to_lower */
10644 nullptr, /* to_upper */
10645 nullptr, /* sort_order */
10646 &my_uca_v900, /* uca */
10647 nullptr, /* tab_to_uni */
10648 nullptr, /* tab_from_uni */
10649 &my_unicase_unicode900, /* caseinfo */
10650 nullptr, /* state_map */
10651 nullptr, /* ident_map */
10652 0, /* strxfrm_multiply */
10653 1, /* caseup_multiply */
10654 1, /* casedn_multiply */
10655 1, /* mbminlen */
10656 4, /* mbmaxlen */
10657 1, /* mbmaxlenlen */
10658 9, /* min_sort_char */
10659 0x10FFFF, /* max_sort_char */
10660 ' ', /* pad char */
10661 false, /* escape_with_backslash_is_dangerous */
10662 3, /* levels_for_compare */
10663 &my_charset_utf8mb4_handler,
10664 &my_collation_uca_900_handler,
10665 NO_PAD};
10666
10667 CHARSET_INFO my_charset_utf8mb4_es_0900_as_cs = {
10668 286,
10669 0,
10670 0, /* number */
10671 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10672 MY_UTF8MB4, /* csname */
10673 MY_UTF8MB4 "_es_0900_as_cs", /* m_coll_name */
10674 "", /* comment */
10675 spanish, /* tailoring */
10676 nullptr, /* coll_param */
10677 ctype_utf8, /* ctype */
10678 nullptr, /* to_lower */
10679 nullptr, /* to_upper */
10680 nullptr, /* sort_order */
10681 &my_uca_v900, /* uca */
10682 nullptr, /* tab_to_uni */
10683 nullptr, /* tab_from_uni */
10684 &my_unicase_unicode900, /* caseinfo */
10685 nullptr, /* state_map */
10686 nullptr, /* ident_map */
10687 0, /* strxfrm_multiply */
10688 1, /* caseup_multiply */
10689 1, /* casedn_multiply */
10690 1, /* mbminlen */
10691 4, /* mbmaxlen */
10692 1, /* mbmaxlenlen */
10693 9, /* min_sort_char */
10694 0x10FFFF, /* max_sort_char */
10695 ' ', /* pad char */
10696 false, /* escape_with_backslash_is_dangerous */
10697 3, /* levels_for_compare */
10698 &my_charset_utf8mb4_handler,
10699 &my_collation_uca_900_handler,
10700 NO_PAD};
10701
10702 CHARSET_INFO my_charset_utf8mb4_sv_0900_as_cs = {
10703 287,
10704 0,
10705 0, /* number */
10706 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10707 MY_UTF8MB4, /* csname */
10708 MY_UTF8MB4 "_sv_0900_as_cs", /* m_coll_name */
10709 "", /* comment */
10710 sv_cldr_30, /* tailoring */
10711 nullptr, /* coll_param */
10712 ctype_utf8, /* ctype */
10713 nullptr, /* to_lower */
10714 nullptr, /* to_upper */
10715 nullptr, /* sort_order */
10716 &my_uca_v900, /* uca */
10717 nullptr, /* tab_to_uni */
10718 nullptr, /* tab_from_uni */
10719 &my_unicase_unicode900, /* caseinfo */
10720 nullptr, /* state_map */
10721 nullptr, /* ident_map */
10722 0, /* strxfrm_multiply */
10723 1, /* caseup_multiply */
10724 1, /* casedn_multiply */
10725 1, /* mbminlen */
10726 4, /* mbmaxlen */
10727 1, /* mbmaxlenlen */
10728 9, /* min_sort_char */
10729 0x10FFFF, /* max_sort_char */
10730 ' ', /* pad char */
10731 false, /* escape_with_backslash_is_dangerous */
10732 3, /* levels_for_compare */
10733 &my_charset_utf8mb4_handler,
10734 &my_collation_uca_900_handler,
10735 NO_PAD};
10736
10737 CHARSET_INFO my_charset_utf8mb4_tr_0900_as_cs = {
10738 288,
10739 0,
10740 0, /* number */
10741 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10742 MY_UTF8MB4, /* csname */
10743 MY_UTF8MB4 "_tr_0900_as_cs", /* m_coll_name */
10744 "", /* comment */
10745 tr_cldr_30, /* tailoring */
10746 nullptr, /* coll_param */
10747 ctype_utf8, /* ctype */
10748 nullptr, /* to_lower */
10749 nullptr, /* to_upper */
10750 nullptr, /* sort_order */
10751 &my_uca_v900, /* uca */
10752 nullptr, /* tab_to_uni */
10753 nullptr, /* tab_from_uni */
10754 &my_unicase_unicode900, /* caseinfo */
10755 nullptr, /* state_map */
10756 nullptr, /* ident_map */
10757 0, /* strxfrm_multiply */
10758 1, /* caseup_multiply */
10759 1, /* casedn_multiply */
10760 1, /* mbminlen */
10761 4, /* mbmaxlen */
10762 1, /* mbmaxlenlen */
10763 9, /* min_sort_char */
10764 0x10FFFF, /* max_sort_char */
10765 ' ', /* pad char */
10766 false, /* escape_with_backslash_is_dangerous */
10767 3, /* levels_for_compare */
10768 &my_charset_utf8mb4_handler,
10769 &my_collation_uca_900_handler,
10770 NO_PAD};
10771
10772 CHARSET_INFO my_charset_utf8mb4_cs_0900_as_cs = {
10773 289,
10774 0,
10775 0, /* number */
10776 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10777 MY_UTF8MB4, /* csname */
10778 MY_UTF8MB4 "_cs_0900_as_cs", /* m_coll_name */
10779 "", /* comment */
10780 cs_cldr_30, /* tailoring */
10781 nullptr, /* coll_param */
10782 ctype_utf8, /* ctype */
10783 nullptr, /* to_lower */
10784 nullptr, /* to_upper */
10785 nullptr, /* sort_order */
10786 &my_uca_v900, /* uca */
10787 nullptr, /* tab_to_uni */
10788 nullptr, /* tab_from_uni */
10789 &my_unicase_unicode900, /* caseinfo */
10790 nullptr, /* state_map */
10791 nullptr, /* ident_map */
10792 0, /* strxfrm_multiply */
10793 1, /* caseup_multiply */
10794 1, /* casedn_multiply */
10795 1, /* mbminlen */
10796 4, /* mbmaxlen */
10797 1, /* mbmaxlenlen */
10798 9, /* min_sort_char */
10799 0x10FFFF, /* max_sort_char */
10800 ' ', /* pad char */
10801 false, /* escape_with_backslash_is_dangerous */
10802 3, /* levels_for_compare */
10803 &my_charset_utf8mb4_handler,
10804 &my_collation_uca_900_handler,
10805 NO_PAD};
10806
10807 CHARSET_INFO my_charset_utf8mb4_da_0900_as_cs = {
10808 290,
10809 0,
10810 0, /* number */
10811 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10812 MY_UTF8MB4, /* csname */
10813 MY_UTF8MB4 "_da_0900_as_cs", /* m_coll_name */
10814 "", /* comment */
10815 da_cldr_30, /* tailoring */
10816 &da_coll_param, /* coll_param */
10817 ctype_utf8, /* ctype */
10818 nullptr, /* to_lower */
10819 nullptr, /* to_upper */
10820 nullptr, /* sort_order */
10821 &my_uca_v900, /* uca */
10822 nullptr, /* tab_to_uni */
10823 nullptr, /* tab_from_uni */
10824 &my_unicase_unicode900, /* caseinfo */
10825 nullptr, /* state_map */
10826 nullptr, /* ident_map */
10827 0, /* strxfrm_multiply */
10828 1, /* caseup_multiply */
10829 1, /* casedn_multiply */
10830 1, /* mbminlen */
10831 4, /* mbmaxlen */
10832 1, /* mbmaxlenlen */
10833 9, /* min_sort_char */
10834 0x10FFFF, /* max_sort_char */
10835 ' ', /* pad char */
10836 false, /* escape_with_backslash_is_dangerous */
10837 3, /* levels_for_compare */
10838 &my_charset_utf8mb4_handler,
10839 &my_collation_uca_900_handler,
10840 NO_PAD};
10841
10842 CHARSET_INFO my_charset_utf8mb4_lt_0900_as_cs = {
10843 291,
10844 0,
10845 0, /* number */
10846 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10847 MY_UTF8MB4, /* csname */
10848 MY_UTF8MB4 "_lt_0900_as_cs", /* m_coll_name */
10849 "", /* comment */
10850 lt_cldr_30, /* tailoring */
10851 nullptr, /* coll_param */
10852 ctype_utf8, /* ctype */
10853 nullptr, /* to_lower */
10854 nullptr, /* to_upper */
10855 nullptr, /* sort_order */
10856 &my_uca_v900, /* uca */
10857 nullptr, /* tab_to_uni */
10858 nullptr, /* tab_from_uni */
10859 &my_unicase_unicode900, /* caseinfo */
10860 nullptr, /* state_map */
10861 nullptr, /* ident_map */
10862 0, /* strxfrm_multiply */
10863 1, /* caseup_multiply */
10864 1, /* casedn_multiply */
10865 1, /* mbminlen */
10866 4, /* mbmaxlen */
10867 1, /* mbmaxlenlen */
10868 9, /* min_sort_char */
10869 0x10FFFF, /* max_sort_char */
10870 ' ', /* pad char */
10871 false, /* escape_with_backslash_is_dangerous */
10872 3, /* levels_for_compare */
10873 &my_charset_utf8mb4_handler,
10874 &my_collation_uca_900_handler,
10875 NO_PAD};
10876
10877 CHARSET_INFO my_charset_utf8mb4_sk_0900_as_cs = {
10878 292,
10879 0,
10880 0, /* number */
10881 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10882 MY_UTF8MB4, /* csname */
10883 MY_UTF8MB4 "_sk_0900_as_cs", /* m_coll_name */
10884 "", /* comment */
10885 sk_cldr_30, /* tailoring */
10886 nullptr, /* coll_param */
10887 ctype_utf8, /* ctype */
10888 nullptr, /* to_lower */
10889 nullptr, /* to_upper */
10890 nullptr, /* sort_order */
10891 &my_uca_v900, /* uca */
10892 nullptr, /* tab_to_uni */
10893 nullptr, /* tab_from_uni */
10894 &my_unicase_unicode900, /* caseinfo */
10895 nullptr, /* state_map */
10896 nullptr, /* ident_map */
10897 0, /* strxfrm_multiply */
10898 1, /* caseup_multiply */
10899 1, /* casedn_multiply */
10900 1, /* mbminlen */
10901 4, /* mbmaxlen */
10902 1, /* mbmaxlenlen */
10903 9, /* min_sort_char */
10904 0x10FFFF, /* max_sort_char */
10905 ' ', /* pad char */
10906 false, /* escape_with_backslash_is_dangerous */
10907 3, /* levels_for_compare */
10908 &my_charset_utf8mb4_handler,
10909 &my_collation_uca_900_handler,
10910 NO_PAD};
10911
10912 CHARSET_INFO my_charset_utf8mb4_es_trad_0900_as_cs = {
10913 293,
10914 0,
10915 0, /* number */
10916 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10917 MY_UTF8MB4, /* csname */
10918 MY_UTF8MB4 "_es_trad_0900_as_cs", /* m_coll_name */
10919 "", /* comment */
10920 es_trad_cldr_30, /* tailoring */
10921 nullptr, /* coll_param */
10922 ctype_utf8, /* ctype */
10923 nullptr, /* to_lower */
10924 nullptr, /* to_upper */
10925 nullptr, /* sort_order */
10926 &my_uca_v900, /* uca */
10927 nullptr, /* tab_to_uni */
10928 nullptr, /* tab_from_uni */
10929 &my_unicase_unicode900, /* caseinfo */
10930 nullptr, /* state_map */
10931 nullptr, /* ident_map */
10932 0, /* strxfrm_multiply */
10933 1, /* caseup_multiply */
10934 1, /* casedn_multiply */
10935 1, /* mbminlen */
10936 4, /* mbmaxlen */
10937 1, /* mbmaxlenlen */
10938 9, /* min_sort_char */
10939 0x10FFFF, /* max_sort_char */
10940 ' ', /* pad char */
10941 false, /* escape_with_backslash_is_dangerous */
10942 3, /* levels_for_compare */
10943 &my_charset_utf8mb4_handler,
10944 &my_collation_uca_900_handler,
10945 NO_PAD};
10946
10947 CHARSET_INFO my_charset_utf8mb4_la_0900_as_cs = {
10948 294,
10949 0,
10950 0, /* number */
10951 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10952 MY_UTF8MB4, /* csname */
10953 MY_UTF8MB4 "_la_0900_as_cs", /* m_coll_name */
10954 "", /* comment */
10955 roman, /* tailoring */
10956 nullptr, /* coll_param */
10957 ctype_utf8, /* ctype */
10958 nullptr, /* to_lower */
10959 nullptr, /* to_upper */
10960 nullptr, /* sort_order */
10961 &my_uca_v900, /* uca */
10962 nullptr, /* tab_to_uni */
10963 nullptr, /* tab_from_uni */
10964 &my_unicase_unicode900, /* caseinfo */
10965 nullptr, /* state_map */
10966 nullptr, /* ident_map */
10967 0, /* strxfrm_multiply */
10968 1, /* caseup_multiply */
10969 1, /* casedn_multiply */
10970 1, /* mbminlen */
10971 4, /* mbmaxlen */
10972 1, /* mbmaxlenlen */
10973 9, /* min_sort_char */
10974 0x10FFFF, /* max_sort_char */
10975 ' ', /* pad char */
10976 false, /* escape_with_backslash_is_dangerous */
10977 3, /* levels_for_compare */
10978 &my_charset_utf8mb4_handler,
10979 &my_collation_uca_900_handler,
10980 NO_PAD};
10981
10982 #if 0
10983 CHARSET_INFO my_charset_utf8mb4_fa_0900_as_cs=
10984 {
10985 295, 0, 0, /* number */
10986 MY_CS_UTF8MB4_UCA_FLAGS|MY_CS_CSSORT,/* state */
10987 MY_UTF8MB4, /* csname */
10988 MY_UTF8MB4 "_fa_0900_as_cs",/* m_coll_name */
10989 "", /* comment */
10990 fa_cldr_30, /* tailoring */
10991 &fa_coll_param, /* coll_param */
10992 ctype_utf8, /* ctype */
10993 NULL, /* to_lower */
10994 NULL, /* to_upper */
10995 NULL, /* sort_order */
10996 &my_uca_v900, /* uca */
10997 NULL, /* tab_to_uni */
10998 NULL, /* tab_from_uni */
10999 &my_unicase_unicode900,/* caseinfo */
11000 NULL, /* state_map */
11001 NULL, /* ident_map */
11002 0, /* strxfrm_multiply */
11003 1, /* caseup_multiply */
11004 1, /* casedn_multiply */
11005 1, /* mbminlen */
11006 4, /* mbmaxlen */
11007 1, /* mbmaxlenlen */
11008 9, /* min_sort_char */
11009 0x10FFFF, /* max_sort_char */
11010 ' ', /* pad char */
11011 0, /* escape_with_backslash_is_dangerous */
11012 3, /* levels_for_compare */
11013 &my_charset_utf8mb4_handler,
11014 &my_collation_uca_900_handler,
11015 NO_PAD
11016 };
11017 #endif
11018
11019 CHARSET_INFO my_charset_utf8mb4_eo_0900_as_cs = {
11020 296,
11021 0,
11022 0, /* number */
11023 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11024 MY_UTF8MB4, /* csname */
11025 MY_UTF8MB4 "_eo_0900_as_cs", /* m_coll_name */
11026 "", /* comment */
11027 esperanto, /* tailoring */
11028 nullptr, /* coll_param */
11029 ctype_utf8, /* ctype */
11030 nullptr, /* to_lower */
11031 nullptr, /* to_upper */
11032 nullptr, /* sort_order */
11033 &my_uca_v900, /* uca */
11034 nullptr, /* tab_to_uni */
11035 nullptr, /* tab_from_uni */
11036 &my_unicase_unicode900, /* caseinfo */
11037 nullptr, /* state_map */
11038 nullptr, /* ident_map */
11039 0, /* strxfrm_multiply */
11040 1, /* caseup_multiply */
11041 1, /* casedn_multiply */
11042 1, /* mbminlen */
11043 4, /* mbmaxlen */
11044 1, /* mbmaxlenlen */
11045 9, /* min_sort_char */
11046 0x10FFFF, /* max_sort_char */
11047 ' ', /* pad char */
11048 false, /* escape_with_backslash_is_dangerous */
11049 3, /* levels_for_compare */
11050 &my_charset_utf8mb4_handler,
11051 &my_collation_uca_900_handler,
11052 NO_PAD};
11053
11054 CHARSET_INFO my_charset_utf8mb4_hu_0900_as_cs = {
11055 297,
11056 0,
11057 0, /* number */
11058 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11059 MY_UTF8MB4, /* csname */
11060 MY_UTF8MB4 "_hu_0900_as_cs", /* m_coll_name */
11061 "", /* comment */
11062 hu_cldr_30, /* tailoring */
11063 nullptr, /* coll_param */
11064 ctype_utf8, /* ctype */
11065 nullptr, /* to_lower */
11066 nullptr, /* to_upper */
11067 nullptr, /* sort_order */
11068 &my_uca_v900, /* uca */
11069 nullptr, /* tab_to_uni */
11070 nullptr, /* tab_from_uni */
11071 &my_unicase_unicode900, /* caseinfo */
11072 nullptr, /* state_map */
11073 nullptr, /* ident_map */
11074 0, /* strxfrm_multiply */
11075 1, /* caseup_multiply */
11076 1, /* casedn_multiply */
11077 1, /* mbminlen */
11078 4, /* mbmaxlen */
11079 1, /* mbmaxlenlen */
11080 9, /* min_sort_char */
11081 0x10FFFF, /* max_sort_char */
11082 ' ', /* pad char */
11083 false, /* escape_with_backslash_is_dangerous */
11084 3, /* levels_for_compare */
11085 &my_charset_utf8mb4_handler,
11086 &my_collation_uca_900_handler,
11087 NO_PAD};
11088
11089 CHARSET_INFO my_charset_utf8mb4_hr_0900_as_cs = {
11090 298,
11091 0,
11092 0, /* number */
11093 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11094 MY_UTF8MB4, /* csname */
11095 MY_UTF8MB4 "_hr_0900_as_cs", /* m_coll_name */
11096 "", /* comment */
11097 hr_cldr_30, /* tailoring */
11098 &hr_coll_param, /* coll_param */
11099 ctype_utf8, /* ctype */
11100 nullptr, /* to_lower */
11101 nullptr, /* to_upper */
11102 nullptr, /* sort_order */
11103 &my_uca_v900, /* uca */
11104 nullptr, /* tab_to_uni */
11105 nullptr, /* tab_from_uni */
11106 &my_unicase_unicode900, /* caseinfo */
11107 nullptr, /* state_map */
11108 nullptr, /* ident_map */
11109 0, /* strxfrm_multiply */
11110 1, /* caseup_multiply */
11111 1, /* casedn_multiply */
11112 1, /* mbminlen */
11113 4, /* mbmaxlen */
11114 1, /* mbmaxlenlen */
11115 9, /* min_sort_char */
11116 0x10FFFF, /* max_sort_char */
11117 ' ', /* pad char */
11118 false, /* escape_with_backslash_is_dangerous */
11119 3, /* levels_for_compare */
11120 &my_charset_utf8mb4_handler,
11121 &my_collation_uca_900_handler,
11122 NO_PAD};
11123
11124 #if 0
11125 CHARSET_INFO my_charset_utf8mb4_si_0900_as_cs=
11126 {
11127 299, 0, 0, /* number */
11128 MY_CS_UTF8MB4_UCA_FLAGS|MY_CS_CSSORT,/* state */
11129 MY_UTF8MB4, /* csname */
11130 MY_UTF8MB4 "_si_0900_as_cs",/* m_coll_name */
11131 "", /* comment */
11132 si_cldr_30, /* tailoring */
11133 NULL, /* coll_param */
11134 ctype_utf8, /* ctype */
11135 NULL, /* to_lower */
11136 NULL, /* to_upper */
11137 NULL, /* sort_order */
11138 &my_uca_v900, /* uca */
11139 NULL, /* tab_to_uni */
11140 NULL, /* tab_from_uni */
11141 &my_unicase_unicode900,/* caseinfo */
11142 NULL, /* state_map */
11143 NULL, /* ident_map */
11144 0, /* strxfrm_multiply */
11145 1, /* caseup_multiply */
11146 1, /* casedn_multiply */
11147 1, /* mbminlen */
11148 4, /* mbmaxlen */
11149 1, /* mbmaxlenlen */
11150 9, /* min_sort_char */
11151 0x10FFFF, /* max_sort_char */
11152 ' ', /* pad char */
11153 0, /* escape_with_backslash_is_dangerous */
11154 3, /* levels_for_compare */
11155 &my_charset_utf8mb4_handler,
11156 &my_collation_uca_900_handler,
11157 NO_PAD
11158 };
11159 #endif
11160
11161 CHARSET_INFO my_charset_utf8mb4_vi_0900_as_cs = {
11162 300,
11163 0,
11164 0, /* number */
11165 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11166 MY_UTF8MB4, /* csname */
11167 MY_UTF8MB4 "_vi_0900_as_cs", /* m_coll_name */
11168 "", /* comment */
11169 vi_cldr_30, /* tailoring */
11170 &vi_coll_param, /* coll_param */
11171 ctype_utf8, /* ctype */
11172 nullptr, /* to_lower */
11173 nullptr, /* to_upper */
11174 nullptr, /* sort_order */
11175 &my_uca_v900, /* uca */
11176 nullptr, /* tab_to_uni */
11177 nullptr, /* tab_from_uni */
11178 &my_unicase_unicode900, /* caseinfo */
11179 nullptr, /* state_map */
11180 nullptr, /* ident_map */
11181 0, /* strxfrm_multiply */
11182 1, /* caseup_multiply */
11183 1, /* casedn_multiply */
11184 1, /* mbminlen */
11185 4, /* mbmaxlen */
11186 1, /* mbmaxlenlen */
11187 9, /* min_sort_char */
11188 0x10FFFF, /* max_sort_char */
11189 ' ', /* pad char */
11190 false, /* escape_with_backslash_is_dangerous */
11191 3, /* levels_for_compare */
11192 &my_charset_utf8mb4_handler,
11193 &my_collation_uca_900_handler,
11194 NO_PAD};
11195
11196 CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs = {
11197 303,
11198 0,
11199 0, /* number */
11200 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11201 MY_UTF8MB4, /* csname */
11202 MY_UTF8MB4 "_ja_0900_as_cs", /* m_coll_name */
11203 "", /* comment */
11204 ja_cldr_30, /* tailoring */
11205 &ja_coll_param, /* coll_param */
11206 ctype_utf8, /* ctype */
11207 nullptr, /* to_lower */
11208 nullptr, /* to_upper */
11209 nullptr, /* sort_order */
11210 &my_uca_v900, /* uca */
11211 nullptr, /* tab_to_uni */
11212 nullptr, /* tab_from_uni */
11213 &my_unicase_unicode900, /* caseinfo */
11214 nullptr, /* state_map */
11215 nullptr, /* ident_map */
11216 0, /* strxfrm_multiply */
11217 1, /* caseup_multiply */
11218 1, /* casedn_multiply */
11219 1, /* mbminlen */
11220 4, /* mbmaxlen */
11221 1, /* mbmaxlenlen */
11222 32, /* min_sort_char */
11223 0x10FFFF, /* max_sort_char */
11224 ' ', /* pad char */
11225 false, /* escape_with_backslash_is_dangerous */
11226 3, /* levels_for_compare */
11227 &my_charset_utf8mb4_handler,
11228 &my_collation_uca_900_handler,
11229 NO_PAD};
11230
11231 CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs_ks = {
11232 304,
11233 0,
11234 0, /* number */
11235 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11236 MY_UTF8MB4, /* csname */
11237 MY_UTF8MB4 "_ja_0900_as_cs_ks", /* m_coll_name */
11238 "", /* comment */
11239 ja_cldr_30, /* tailoring */
11240 &ja_coll_param, /* coll_param */
11241 ctype_utf8, /* ctype */
11242 nullptr, /* to_lower */
11243 nullptr, /* to_upper */
11244 nullptr, /* sort_order */
11245 &my_uca_v900, /* uca */
11246 nullptr, /* tab_to_uni */
11247 nullptr, /* tab_from_uni */
11248 &my_unicase_unicode900, /* caseinfo */
11249 nullptr, /* state_map */
11250 nullptr, /* ident_map */
11251 24, /* strxfrm_multiply */
11252 1, /* caseup_multiply */
11253 1, /* casedn_multiply */
11254 1, /* mbminlen */
11255 4, /* mbmaxlen */
11256 1, /* mbmaxlenlen */
11257 32, /* min_sort_char */
11258 0x10FFFF, /* max_sort_char */
11259 ' ', /* pad char */
11260 false, /* escape_with_backslash_is_dangerous */
11261 4, /* levels_for_compare */
11262 &my_charset_utf8mb4_handler,
11263 &my_collation_uca_900_handler,
11264 NO_PAD};
11265
11266 CHARSET_INFO my_charset_utf8mb4_0900_as_ci = {
11267 305,
11268 0,
11269 0, /* number */
11270 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11271 MY_UTF8MB4, /* csname */
11272 MY_UTF8MB4 "_0900_as_ci", /* m_coll_name */
11273 "", /* comment */
11274 nullptr, /* tailoring */
11275 nullptr, /* coll_param */
11276 ctype_utf8, /* ctype */
11277 nullptr, /* to_lower */
11278 nullptr, /* to_upper */
11279 nullptr, /* sort_order */
11280 &my_uca_v900, /* uca */
11281 nullptr, /* tab_to_uni */
11282 nullptr, /* tab_from_uni */
11283 &my_unicase_unicode900, /* caseinfo */
11284 nullptr, /* state_map */
11285 nullptr, /* ident_map */
11286 0, /* strxfrm_multiply */
11287 1, /* caseup_multiply */
11288 1, /* casedn_multiply */
11289 1, /* mbminlen */
11290 4, /* mbmaxlen */
11291 1, /* mbmaxlenlen */
11292 32, /* min_sort_char */
11293 0x10FFFF, /* max_sort_char */
11294 ' ', /* pad char */
11295 false, /* escape_with_backslash_is_dangerous */
11296 2, /* levels_for_compare */
11297 &my_charset_utf8mb4_handler,
11298 &my_collation_uca_900_handler,
11299 NO_PAD};
11300
11301 CHARSET_INFO my_charset_utf8mb4_ru_0900_ai_ci = {
11302 306,
11303 0,
11304 0, /* number */
11305 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11306 MY_UTF8MB4, /* csname */
11307 MY_UTF8MB4 "_ru_0900_ai_ci", /* m_coll_name */
11308 "", /* comment */
11309 "", /* tailoring */
11310 &ru_coll_param, /* coll_param */
11311 ctype_utf8, /* ctype */
11312 nullptr, /* to_lower */
11313 nullptr, /* to_upper */
11314 nullptr, /* sort_order */
11315 &my_uca_v900, /* uca */
11316 nullptr, /* tab_to_uni */
11317 nullptr, /* tab_from_uni */
11318 &my_unicase_unicode900, /* caseinfo */
11319 nullptr, /* state_map */
11320 nullptr, /* ident_map */
11321 0, /* strxfrm_multiply */
11322 1, /* caseup_multiply */
11323 1, /* casedn_multiply */
11324 1, /* mbminlen */
11325 4, /* mbmaxlen */
11326 1, /* mbmaxlenlen */
11327 32, /* min_sort_char */
11328 0x10FFFF, /* max_sort_char */
11329 ' ', /* pad char */
11330 false, /* escape_with_backslash_is_dangerous */
11331 1, /* levels_for_compare */
11332 &my_charset_utf8mb4_handler,
11333 &my_collation_uca_900_handler,
11334 NO_PAD};
11335
11336 CHARSET_INFO my_charset_utf8mb4_ru_0900_as_cs = {
11337 307,
11338 0,
11339 0, /* number */
11340 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11341 MY_UTF8MB4, /* csname */
11342 MY_UTF8MB4 "_ru_0900_as_cs", /* m_coll_name */
11343 "", /* comment */
11344 "", /* tailoring */
11345 &ru_coll_param, /* coll_param */
11346 ctype_utf8, /* ctype */
11347 nullptr, /* to_lower */
11348 nullptr, /* to_upper */
11349 nullptr, /* sort_order */
11350 &my_uca_v900, /* uca */
11351 nullptr, /* tab_to_uni */
11352 nullptr, /* tab_from_uni */
11353 &my_unicase_unicode900, /* caseinfo */
11354 nullptr, /* state_map */
11355 nullptr, /* ident_map */
11356 0, /* strxfrm_multiply */
11357 1, /* caseup_multiply */
11358 1, /* casedn_multiply */
11359 1, /* mbminlen */
11360 4, /* mbmaxlen */
11361 1, /* mbmaxlenlen */
11362 32, /* min_sort_char */
11363 0x10FFFF, /* max_sort_char */
11364 ' ', /* pad char */
11365 false, /* escape_with_backslash_is_dangerous */
11366 3, /* levels_for_compare */
11367 &my_charset_utf8mb4_handler,
11368 &my_collation_uca_900_handler,
11369 NO_PAD};
11370
11371 CHARSET_INFO my_charset_utf8mb4_zh_0900_as_cs = {
11372 308,
11373 0,
11374 0, /* number */
11375 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11376 MY_UTF8MB4, /* csname */
11377 MY_UTF8MB4 "_zh_0900_as_cs", /* m_coll_name */
11378 "", /* comment */
11379 zh_cldr_30, /* tailoring */
11380 &zh_coll_param, /* coll_param */
11381 ctype_utf8, /* ctype */
11382 nullptr, /* to_lower */
11383 nullptr, /* to_upper */
11384 nullptr, /* sort_order */
11385 &my_uca_v900, /* uca */
11386 nullptr, /* tab_to_uni */
11387 nullptr, /* tab_from_uni */
11388 &my_unicase_unicode900, /* caseinfo */
11389 nullptr, /* state_map */
11390 nullptr, /* ident_map */
11391 0, /* strxfrm_multiply */
11392 1, /* caseup_multiply */
11393 1, /* casedn_multiply */
11394 1, /* mbminlen */
11395 4, /* mbmaxlen */
11396 1, /* mbmaxlenlen */
11397 32, /* min_sort_char */
11398 0x10FFFF, /* max_sort_char */
11399 ' ', /* pad char */
11400 false, /* escape_with_backslash_is_dangerous */
11401 3, /* levels_for_compare */
11402 &my_charset_utf8mb4_handler,
11403 &my_collation_uca_900_handler,
11404 NO_PAD};
11405
11406 /*
11407 Comparing the UTF-8 representation automatically yields codepoint order,
11408 so we can just do a binary comparison. Note that
11409 my_strnxfrm_unicode_full_bin() chooses to transform to UCS before collation;
11410 this is purely for legacy reasons and is not needed here.
11411 */
11412 1114694 static size_t my_strnxfrm_utf8mb4_0900_bin(const CHARSET_INFO *cs
11413 [[maybe_unused]],
11414 uchar *dst, size_t dstlen,
11415 uint nweights [[maybe_unused]],
11416 const uchar *src, size_t srclen,
11417 uint flags) {
11418
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1114694 times.
1114694 assert(src);
11419
11420 1114694 size_t weight_len = std::min<size_t>(srclen, dstlen);
11421 1114694 memcpy(dst, src, weight_len);
11422
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 1114658 times.
1114694 if (flags & MY_STRXFRM_PAD_TO_MAXLEN) {
11423 36 memset(dst + weight_len, 0, dstlen - weight_len);
11424 36 return dstlen;
11425 } else {
11426 1114658 return weight_len;
11427 }
11428 }
11429
11430 1377549 static int my_strnncollsp_utf8mb4_0900_bin(const CHARSET_INFO *cs,
11431 const uchar *s, size_t slen,
11432 const uchar *t, size_t tlen) {
11433 1377549 return my_strnncoll_mb_bin(cs, s, slen, t, tlen, false);
11434 }
11435
11436 static MY_COLLATION_HANDLER my_collation_utf8mb4_0900_bin_handler = {
11437 nullptr, /* init */
11438 nullptr,
11439 my_strnncoll_mb_bin,
11440 my_strnncollsp_utf8mb4_0900_bin,
11441 my_strnxfrm_utf8mb4_0900_bin,
11442 my_strnxfrmlen_simple,
11443 my_like_range_mb,
11444 my_wildcmp_mb_bin,
11445 my_strcasecmp_mb_bin,
11446 my_instr_mb,
11447 my_hash_sort_mb_bin,
11448 my_propagate_simple};
11449
11450 CHARSET_INFO my_charset_utf8mb4_0900_bin = {
11451 309,
11452 0,
11453 0, // number
11454 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_BINSORT, // state
11455 MY_UTF8MB4, // cs name
11456 MY_UTF8MB4 "_0900_bin", // name
11457 "", // comment
11458 nullptr, // tailoring
11459 nullptr, // coll_param
11460 ctype_utf8, // ctype
11461 nullptr, // to_lower
11462 nullptr, // to_upper
11463 nullptr, // sort_order
11464 nullptr, // uca
11465 nullptr, // tab_to_uni
11466 nullptr, // tab_from_uni
11467 &my_unicase_unicode900, // caseinfo
11468 nullptr, // state_map
11469 nullptr, // ident_map
11470 1, // strxfrm_multiply
11471 1, // caseup_multiply
11472 1, // casedn_multiply
11473 1, // mbminlen
11474 4, // mbmaxlen
11475 1, // mbmaxlenlen
11476 0, // min_sort_char
11477 0x10FFFF, // max_sort_char
11478 ' ', // pad char
11479 false, // escape_with_backslash_is_dangerous
11480 1, // levels_for_compare
11481 &my_charset_utf8mb4_handler,
11482 &my_collation_utf8mb4_0900_bin_handler,
11483 NO_PAD};
11484
11485 CHARSET_INFO my_charset_utf8mb4_nb_0900_ai_ci = {
11486 310,
11487 0,
11488 0, /* number */
11489 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11490 MY_UTF8MB4, /* csname */
11491 MY_UTF8MB4 "_nb_0900_ai_ci", /* name */
11492 "", /* comment */
11493 da_cldr_30, /* tailoring */
11494 nullptr, /* coll_param */
11495 ctype_utf8, /* ctype */
11496 nullptr, /* to_lower */
11497 nullptr, /* to_upper */
11498 nullptr, /* sort_order */
11499 &my_uca_v900, /* uca */
11500 nullptr, /* tab_to_uni */
11501 nullptr, /* tab_from_uni */
11502 &my_unicase_unicode900, /* caseinfo */
11503 nullptr, /* state_map */
11504 nullptr, /* ident_map */
11505 0, /* strxfrm_multiply */
11506 1, /* caseup_multiply */
11507 1, /* casedn_multiply */
11508 1, /* mbminlen */
11509 4, /* mbmaxlen */
11510 1, /* mbmaxlenlen */
11511 9, /* min_sort_char */
11512 0x10FFFF, /* max_sort_char */
11513 ' ', /* pad char */
11514 false, /* escape_with_backslash_is_dangerous */
11515 1, /* levels_for_compare */
11516 &my_charset_utf8mb4_handler,
11517 &my_collation_uca_900_handler,
11518 NO_PAD};
11519
11520 CHARSET_INFO my_charset_utf8mb4_nb_0900_as_cs = {
11521 311,
11522 0,
11523 0, /* number */
11524 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11525 MY_UTF8MB4, /* csname */
11526 MY_UTF8MB4 "_nb_0900_as_cs", /* name */
11527 "", /* comment */
11528 da_cldr_30, /* tailoring */
11529 &no_coll_param, /* coll_param */
11530 ctype_utf8, /* ctype */
11531 nullptr, /* to_lower */
11532 nullptr, /* to_upper */
11533 nullptr, /* sort_order */
11534 &my_uca_v900, /* uca */
11535 nullptr, /* tab_to_uni */
11536 nullptr, /* tab_from_uni */
11537 &my_unicase_unicode900, /* caseinfo */
11538 nullptr, /* state_map */
11539 nullptr, /* ident_map */
11540 0, /* strxfrm_multiply */
11541 1, /* caseup_multiply */
11542 1, /* casedn_multiply */
11543 1, /* mbminlen */
11544 4, /* mbmaxlen */
11545 1, /* mbmaxlenlen */
11546 9, /* min_sort_char */
11547 0x10FFFF, /* max_sort_char */
11548 ' ', /* pad char */
11549 false, /* escape_with_backslash_is_dangerous */
11550 3, /* levels_for_compare */
11551 &my_charset_utf8mb4_handler,
11552 &my_collation_uca_900_handler,
11553 NO_PAD};
11554
11555 CHARSET_INFO my_charset_utf8mb4_nn_0900_ai_ci = {
11556 312,
11557 0,
11558 0, /* number */
11559 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11560 MY_UTF8MB4, /* csname */
11561 MY_UTF8MB4 "_nn_0900_ai_ci", /* name */
11562 "", /* comment */
11563 da_cldr_30, /* tailoring */
11564 nullptr, /* coll_param */
11565 ctype_utf8, /* ctype */
11566 nullptr, /* to_lower */
11567 nullptr, /* to_upper */
11568 nullptr, /* sort_order */
11569 &my_uca_v900, /* uca */
11570 nullptr, /* tab_to_uni */
11571 nullptr, /* tab_from_uni */
11572 &my_unicase_unicode900, /* caseinfo */
11573 nullptr, /* state_map */
11574 nullptr, /* ident_map */
11575 0, /* strxfrm_multiply */
11576 1, /* caseup_multiply */
11577 1, /* casedn_multiply */
11578 1, /* mbminlen */
11579 4, /* mbmaxlen */
11580 1, /* mbmaxlenlen */
11581 9, /* min_sort_char */
11582 0x10FFFF, /* max_sort_char */
11583 ' ', /* pad char */
11584 false, /* escape_with_backslash_is_dangerous */
11585 1, /* levels_for_compare */
11586 &my_charset_utf8mb4_handler,
11587 &my_collation_uca_900_handler,
11588 NO_PAD};
11589
11590 CHARSET_INFO my_charset_utf8mb4_nn_0900_as_cs = {
11591 313,
11592 0,
11593 0, /* number */
11594 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11595 MY_UTF8MB4, /* csname */
11596 MY_UTF8MB4 "_nn_0900_as_cs", /* name */
11597 "", /* comment */
11598 da_cldr_30, /* tailoring */
11599 &no_coll_param, /* coll_param */
11600 ctype_utf8, /* ctype */
11601 nullptr, /* to_lower */
11602 nullptr, /* to_upper */
11603 nullptr, /* sort_order */
11604 &my_uca_v900, /* uca */
11605 nullptr, /* tab_to_uni */
11606 nullptr, /* tab_from_uni */
11607 &my_unicase_unicode900, /* caseinfo */
11608 nullptr, /* state_map */
11609 nullptr, /* ident_map */
11610 0, /* strxfrm_multiply */
11611 1, /* caseup_multiply */
11612 1, /* casedn_multiply */
11613 1, /* mbminlen */
11614 4, /* mbmaxlen */
11615 1, /* mbmaxlenlen */
11616 9, /* min_sort_char */
11617 0x10FFFF, /* max_sort_char */
11618 ' ', /* pad char */
11619 false, /* escape_with_backslash_is_dangerous */
11620 3, /* levels_for_compare */
11621 &my_charset_utf8mb4_handler,
11622 &my_collation_uca_900_handler,
11623 NO_PAD};
11624
11625 CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_ai_ci = {
11626 314,
11627 0,
11628 0, /* number */
11629 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11630 MY_UTF8MB4, /* csname */
11631 MY_UTF8MB4 "_sr_latn_0900_ai_ci", /* name */
11632 "", /* comment */
11633 hr_cldr_30, /* tailoring */
11634 &hr_coll_param, /* coll_param */
11635 ctype_utf8, /* ctype */
11636 nullptr, /* to_lower */
11637 nullptr, /* to_upper */
11638 nullptr, /* sort_order */
11639 &my_uca_v900, /* uca */
11640 nullptr, /* tab_to_uni */
11641 nullptr, /* tab_from_uni */
11642 &my_unicase_unicode900, /* caseinfo */
11643 nullptr, /* state_map */
11644 nullptr, /* ident_map */
11645 0, /* strxfrm_multiply */
11646 1, /* caseup_multiply */
11647 1, /* casedn_multiply */
11648 1, /* mbminlen */
11649 4, /* mbmaxlen */
11650 1, /* mbmaxlenlen */
11651 9, /* min_sort_char */
11652 0x10FFFF, /* max_sort_char */
11653 ' ', /* pad char */
11654 false, /* escape_with_backslash_is_dangerous */
11655 1, /* levels_for_compare */
11656 &my_charset_utf8mb4_handler,
11657 &my_collation_uca_900_handler,
11658 NO_PAD};
11659
11660 CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_as_cs = {
11661 315,
11662 0,
11663 0, /* number */
11664 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11665 MY_UTF8MB4, /* csname */
11666 MY_UTF8MB4 "_sr_latn_0900_as_cs", /* name */
11667 "", /* comment */
11668 hr_cldr_30, /* tailoring */
11669 &hr_coll_param, /* coll_param */
11670 ctype_utf8, /* ctype */
11671 nullptr, /* to_lower */
11672 nullptr, /* to_upper */
11673 nullptr, /* sort_order */
11674 &my_uca_v900, /* uca */
11675 nullptr, /* tab_to_uni */
11676 nullptr, /* tab_from_uni */
11677 &my_unicase_unicode900, /* caseinfo */
11678 nullptr, /* state_map */
11679 nullptr, /* ident_map */
11680 0, /* strxfrm_multiply */
11681 1, /* caseup_multiply */
11682 1, /* casedn_multiply */
11683 1, /* mbminlen */
11684 4, /* mbmaxlen */
11685 1, /* mbmaxlenlen */
11686 9, /* min_sort_char */
11687 0x10FFFF, /* max_sort_char */
11688 ' ', /* pad char */
11689 false, /* escape_with_backslash_is_dangerous */
11690 3, /* levels_for_compare */
11691 &my_charset_utf8mb4_handler,
11692 &my_collation_uca_900_handler,
11693 NO_PAD};
11694
11695 CHARSET_INFO my_charset_utf8mb4_bs_0900_ai_ci = {
11696 316,
11697 0,
11698 0, /* number */
11699 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11700 MY_UTF8MB4, /* csname */
11701 MY_UTF8MB4 "_bs_0900_ai_ci", /* name */
11702 "", /* comment */
11703 hr_cldr_30, /* tailoring */
11704 &hr_coll_param, /* coll_param */
11705 ctype_utf8, /* ctype */
11706 nullptr, /* to_lower */
11707 nullptr, /* to_upper */
11708 nullptr, /* sort_order */
11709 &my_uca_v900, /* uca */
11710 nullptr, /* tab_to_uni */
11711 nullptr, /* tab_from_uni */
11712 &my_unicase_unicode900, /* caseinfo */
11713 nullptr, /* state_map */
11714 nullptr, /* ident_map */
11715 0, /* strxfrm_multiply */
11716 1, /* caseup_multiply */
11717 1, /* casedn_multiply */
11718 1, /* mbminlen */
11719 4, /* mbmaxlen */
11720 1, /* mbmaxlenlen */
11721 9, /* min_sort_char */
11722 0x10FFFF, /* max_sort_char */
11723 ' ', /* pad char */
11724 false, /* escape_with_backslash_is_dangerous */
11725 1, /* levels_for_compare */
11726 &my_charset_utf8mb4_handler,
11727 &my_collation_uca_900_handler,
11728 NO_PAD};
11729
11730 CHARSET_INFO my_charset_utf8mb4_bs_0900_as_cs = {
11731 317,
11732 0,
11733 0, /* number */
11734 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11735 MY_UTF8MB4, /* csname */
11736 MY_UTF8MB4 "_bs_0900_as_cs", /* name */
11737 "", /* comment */
11738 hr_cldr_30, /* tailoring */
11739 &hr_coll_param, /* coll_param */
11740 ctype_utf8, /* ctype */
11741 nullptr, /* to_lower */
11742 nullptr, /* to_upper */
11743 nullptr, /* sort_order */
11744 &my_uca_v900, /* uca */
11745 nullptr, /* tab_to_uni */
11746 nullptr, /* tab_from_uni */
11747 &my_unicase_unicode900, /* caseinfo */
11748 nullptr, /* state_map */
11749 nullptr, /* ident_map */
11750 0, /* strxfrm_multiply */
11751 1, /* caseup_multiply */
11752 1, /* casedn_multiply */
11753 1, /* mbminlen */
11754 4, /* mbmaxlen */
11755 1, /* mbmaxlenlen */
11756 9, /* min_sort_char */
11757 0x10FFFF, /* max_sort_char */
11758 ' ', /* pad char */
11759 false, /* escape_with_backslash_is_dangerous */
11760 3, /* levels_for_compare */
11761 &my_charset_utf8mb4_handler,
11762 &my_collation_uca_900_handler,
11763 NO_PAD};
11764
11765 CHARSET_INFO my_charset_utf8mb4_bg_0900_ai_ci = {
11766 318,
11767 0,
11768 0, /* number */
11769 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11770 MY_UTF8MB4, /* csname */
11771 MY_UTF8MB4 "_bg_0900_ai_ci", /* name */
11772 "", /* comment */
11773 "", /* tailoring */
11774 &ru_coll_param, /* coll_param */
11775 ctype_utf8, /* ctype */
11776 nullptr, /* to_lower */
11777 nullptr, /* to_upper */
11778 nullptr, /* sort_order */
11779 &my_uca_v900, /* uca */
11780 nullptr, /* tab_to_uni */
11781 nullptr, /* tab_from_uni */
11782 &my_unicase_unicode900, /* caseinfo */
11783 nullptr, /* state_map */
11784 nullptr, /* ident_map */
11785 0, /* strxfrm_multiply */
11786 1, /* caseup_multiply */
11787 1, /* casedn_multiply */
11788 1, /* mbminlen */
11789 4, /* mbmaxlen */
11790 1, /* mbmaxlenlen */
11791 32, /* min_sort_char */
11792 0x10FFFF, /* max_sort_char */
11793 ' ', /* pad char */
11794 false, /* escape_with_backslash_is_dangerous */
11795 1, /* levels_for_compare */
11796 &my_charset_utf8mb4_handler,
11797 &my_collation_uca_900_handler,
11798 NO_PAD};
11799
11800 CHARSET_INFO my_charset_utf8mb4_bg_0900_as_cs = {
11801 319,
11802 0,
11803 0, /* number */
11804 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11805 MY_UTF8MB4, /* csname */
11806 MY_UTF8MB4 "_bg_0900_as_cs", /* name */
11807 "", /* comment */
11808 "", /* tailoring */
11809 &ru_coll_param, /* coll_param */
11810 ctype_utf8, /* ctype */
11811 nullptr, /* to_lower */
11812 nullptr, /* to_upper */
11813 nullptr, /* sort_order */
11814 &my_uca_v900, /* uca */
11815 nullptr, /* tab_to_uni */
11816 nullptr, /* tab_from_uni */
11817 &my_unicase_unicode900, /* caseinfo */
11818 nullptr, /* state_map */
11819 nullptr, /* ident_map */
11820 0, /* strxfrm_multiply */
11821 1, /* caseup_multiply */
11822 1, /* casedn_multiply */
11823 1, /* mbminlen */
11824 4, /* mbmaxlen */
11825 1, /* mbmaxlenlen */
11826 32, /* min_sort_char */
11827 0x10FFFF, /* max_sort_char */
11828 ' ', /* pad char */
11829 false, /* escape_with_backslash_is_dangerous */
11830 3, /* levels_for_compare */
11831 &my_charset_utf8mb4_handler,
11832 &my_collation_uca_900_handler,
11833 NO_PAD};
11834
11835 CHARSET_INFO my_charset_utf8mb4_gl_0900_ai_ci = {
11836 320,
11837 0,
11838 0, /* number */
11839 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11840 MY_UTF8MB4, /* csname */
11841 MY_UTF8MB4 "_gl_0900_ai_ci", /* name */
11842 "", /* comment */
11843 spanish, /* tailoring */
11844 nullptr, /* coll_param */
11845 ctype_utf8, /* ctype */
11846 nullptr, /* to_lower */
11847 nullptr, /* to_upper */
11848 nullptr, /* sort_order */
11849 &my_uca_v900, /* uca */
11850 nullptr, /* tab_to_uni */
11851 nullptr, /* tab_from_uni */
11852 &my_unicase_unicode900, /* caseinfo */
11853 nullptr, /* state_map */
11854 nullptr, /* ident_map */
11855 0, /* strxfrm_multiply */
11856 1, /* caseup_multiply */
11857 1, /* casedn_multiply */
11858 1, /* mbminlen */
11859 4, /* mbmaxlen */
11860 1, /* mbmaxlenlen */
11861 9, /* min_sort_char */
11862 0x10FFFF, /* max_sort_char */
11863 ' ', /* pad char */
11864 false, /* escape_with_backslash_is_dangerous */
11865 1, /* levels_for_compare */
11866 &my_charset_utf8mb4_handler,
11867 &my_collation_uca_900_handler,
11868 NO_PAD};
11869
11870 CHARSET_INFO my_charset_utf8mb4_gl_0900_as_cs = {
11871 321,
11872 0,
11873 0, /* number */
11874 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11875 MY_UTF8MB4, /* csname */
11876 MY_UTF8MB4 "_gl_0900_as_cs", /* name */
11877 "", /* comment */
11878 spanish, /* tailoring */
11879 nullptr, /* coll_param */
11880 ctype_utf8, /* ctype */
11881 nullptr, /* to_lower */
11882 nullptr, /* to_upper */
11883 nullptr, /* sort_order */
11884 &my_uca_v900, /* uca */
11885 nullptr, /* tab_to_uni */
11886 nullptr, /* tab_from_uni */
11887 &my_unicase_unicode900, /* caseinfo */
11888 nullptr, /* state_map */
11889 nullptr, /* ident_map */
11890 0, /* strxfrm_multiply */
11891 1, /* caseup_multiply */
11892 1, /* casedn_multiply */
11893 1, /* mbminlen */
11894 4, /* mbmaxlen */
11895 1, /* mbmaxlenlen */
11896 9, /* min_sort_char */
11897 0x10FFFF, /* max_sort_char */
11898 ' ', /* pad char */
11899 false, /* escape_with_backslash_is_dangerous */
11900 3, /* levels_for_compare */
11901 &my_charset_utf8mb4_handler,
11902 &my_collation_uca_900_handler,
11903 NO_PAD};
11904
11905 CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_ai_ci = {
11906 322,
11907 0,
11908 0, /* number */
11909 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11910 MY_UTF8MB4, /* csname */
11911 MY_UTF8MB4 "_mn_cyrl_0900_ai_ci", /* name */
11912 "", /* comment */
11913 "", /* tailoring */
11914 &ru_coll_param, /* coll_param */
11915 ctype_utf8, /* ctype */
11916 nullptr, /* to_lower */
11917 nullptr, /* to_upper */
11918 nullptr, /* sort_order */
11919 &my_uca_v900, /* uca */
11920 nullptr, /* tab_to_uni */
11921 nullptr, /* tab_from_uni */
11922 &my_unicase_unicode900, /* caseinfo */
11923 nullptr, /* state_map */
11924 nullptr, /* ident_map */
11925 0, /* strxfrm_multiply */
11926 1, /* caseup_multiply */
11927 1, /* casedn_multiply */
11928 1, /* mbminlen */
11929 4, /* mbmaxlen */
11930 1, /* mbmaxlenlen */
11931 32, /* min_sort_char */
11932 0x10FFFF, /* max_sort_char */
11933 ' ', /* pad char */
11934 false, /* escape_with_backslash_is_dangerous */
11935 1, /* levels_for_compare */
11936 &my_charset_utf8mb4_handler,
11937 &my_collation_uca_900_handler,
11938 NO_PAD};
11939
11940 CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_as_cs = {
11941 323,
11942 0,
11943 0, /* number */
11944 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11945 MY_UTF8MB4, /* csname */
11946 MY_UTF8MB4 "_mn_cyrl_0900_as_cs", /* name */
11947 "", /* comment */
11948 "", /* tailoring */
11949 &ru_coll_param, /* coll_param */
11950 ctype_utf8, /* ctype */
11951 nullptr, /* to_lower */
11952 nullptr, /* to_upper */
11953 nullptr, /* sort_order */
11954 &my_uca_v900, /* uca */
11955 nullptr, /* tab_to_uni */
11956 nullptr, /* tab_from_uni */
11957 &my_unicase_unicode900, /* caseinfo */
11958 nullptr, /* state_map */
11959 nullptr, /* ident_map */
11960 0, /* strxfrm_multiply */
11961 1, /* caseup_multiply */
11962 1, /* casedn_multiply */
11963 1, /* mbminlen */
11964 4, /* mbmaxlen */
11965 1, /* mbmaxlenlen */
11966 32, /* min_sort_char */
11967 0x10FFFF, /* max_sort_char */
11968 ' ', /* pad char */
11969 false, /* escape_with_backslash_is_dangerous */
11970 3, /* levels_for_compare */
11971 &my_charset_utf8mb4_handler,
11972 &my_collation_uca_900_handler,
11973 NO_PAD};
11974